diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index ef3fba37817daa5..c12217d549479bc 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -2577,6 +2577,7 @@ struct CFISnapshot {
     case MCCFIInstruction::OpAdjustCfaOffset:
     case MCCFIInstruction::OpWindowSave:
     case MCCFIInstruction::OpNegateRAState:
+    case MCCFIInstruction::OpNegateRAStateWithPC:
     case MCCFIInstruction::OpLLVMDefAspaceCfa:
     case MCCFIInstruction::OpLabel:
       llvm_unreachable("unsupported CFI opcode");
@@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot {
     case MCCFIInstruction::OpAdjustCfaOffset:
     case MCCFIInstruction::OpWindowSave:
     case MCCFIInstruction::OpNegateRAState:
+    case MCCFIInstruction::OpNegateRAStateWithPC:
     case MCCFIInstruction::OpLLVMDefAspaceCfa:
     case MCCFIInstruction::OpLabel:
       llvm_unreachable("unsupported CFI opcode");
@@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
     case MCCFIInstruction::OpAdjustCfaOffset:
     case MCCFIInstruction::OpWindowSave:
     case MCCFIInstruction::OpNegateRAState:
+    case MCCFIInstruction::OpNegateRAStateWithPC:
     case MCCFIInstruction::OpLLVMDefAspaceCfa:
     case MCCFIInstruction::OpLabel:
       llvm_unreachable("unsupported CFI opcode");
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index 3242ba22f591642..cf745ca7bf7b62a 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -11,6 +11,7 @@ REQUIRES: system-linux
 
 RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
 RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
+RUN:   --show-density \
 RUN:   --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
 RUN:   --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
 
@@ -18,6 +19,7 @@ CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty
 CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
 
 RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
+RUN:   --show-density \
 RUN:   --profile-density-cutoff-hot=970000 \
 RUN:   --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING
 
diff --git a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
index 9120c4b6c0d9ae9..33ac65e715ce811 100644
--- a/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
@@ -49,6 +49,7 @@
 #include "MultipleStatementMacroCheck.h"
 #include "NoEscapeCheck.h"
 #include "NonZeroEnumToBoolConversionCheck.h"
+#include "NondeterministicPointerIterationOrderCheck.h"
 #include "NotNullTerminatedResultCheck.h"
 #include "OptionalValueConversionCheck.h"
 #include "ParentVirtualCallCheck.h"
@@ -174,6 +175,8 @@ class BugproneModule : public ClangTidyModule {
         "bugprone-multiple-new-in-one-expression");
     CheckFactories.registerCheck<MultipleStatementMacroCheck>(
         "bugprone-multiple-statement-macro");
+    CheckFactories.registerCheck<NondeterministicPointerIterationOrderCheck>(
+        "bugprone-nondeterministic-pointer-iteration-order");
     CheckFactories.registerCheck<OptionalValueConversionCheck>(
         "bugprone-optional-value-conversion");
     CheckFactories.registerCheck<PointerArithmeticOnPolymorphicObjectCheck>(
diff --git a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
index f0667bbfdd87f7f..b0a2318acc05970 100644
--- a/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
@@ -45,6 +45,7 @@ add_clang_library(clangTidyBugproneModule STATIC
   MultipleNewInOneExpressionCheck.cpp
   MultipleStatementMacroCheck.cpp
   NoEscapeCheck.cpp
+  NondeterministicPointerIterationOrderCheck.cpp
   NonZeroEnumToBoolConversionCheck.cpp
   NotNullTerminatedResultCheck.cpp
   OptionalValueConversionCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp
new file mode 100644
index 000000000000000..22ecd689614696f
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.cpp
@@ -0,0 +1,78 @@
+//===----- NondeterministicPointerIterationOrderCheck.cpp - clang-tidy ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "NondeterministicPointerIterationOrderCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/Lex/Lexer.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang::tidy::bugprone {
+
+void NondeterministicPointerIterationOrderCheck::registerMatchers(
+    MatchFinder *Finder) {
+
+  auto LoopVariable = varDecl(hasType(
+      qualType(hasCanonicalType(anyOf(referenceType(), pointerType())))));
+
+  auto RangeInit = declRefExpr(to(varDecl(
+      hasType(recordDecl(hasAnyName("std::unordered_set", "std::unordered_map",
+                                    "std::unordered_multiset",
+                                    "std::unordered_multimap"))
+                  .bind("recorddecl")))));
+
+  Finder->addMatcher(cxxForRangeStmt(hasLoopVariable(LoopVariable),
+                                     hasRangeInit(RangeInit.bind("rangeinit")))
+                         .bind("cxxForRangeStmt"),
+                     this);
+
+  auto SortFuncM = callee(functionDecl(hasAnyName(
+      "std::is_sorted", "std::nth_element", "std::sort", "std::partial_sort",
+      "std::partition", "std::stable_partition", "std::stable_sort")));
+
+  auto IteratesPointerEltsM = hasArgument(
+      0,
+      cxxMemberCallExpr(on(hasType(cxxRecordDecl(has(fieldDecl(hasType(qualType(
+          hasCanonicalType(pointsTo(hasCanonicalType(pointerType()))))))))))));
+
+  Finder->addMatcher(
+      callExpr(allOf(SortFuncM, IteratesPointerEltsM)).bind("sortsemantic"),
+      this);
+}
+
+void NondeterministicPointerIterationOrderCheck::check(
+    const MatchFinder::MatchResult &Result) {
+  const auto *ForRangePointers =
+      Result.Nodes.getNodeAs<CXXForRangeStmt>("cxxForRangeStmt");
+
+  if ((ForRangePointers) && !(ForRangePointers->getBeginLoc().isMacroID())) {
+    const auto *RangeInit = Result.Nodes.getNodeAs<Stmt>("rangeinit");
+    if (const auto *ClassTemplate =
+            Result.Nodes.getNodeAs<ClassTemplateSpecializationDecl>(
+                "recorddecl")) {
+      const TemplateArgumentList &TemplateArgs =
+          ClassTemplate->getTemplateArgs();
+      const bool IsAlgoArgPointer =
+          TemplateArgs[0].getAsType()->isPointerType();
+
+      if (IsAlgoArgPointer) {
+        SourceRange R = RangeInit->getSourceRange();
+        diag(R.getBegin(), "iteration of pointers is nondeterministic") << R;
+      }
+    }
+    return;
+  }
+  const auto *SortPointers = Result.Nodes.getNodeAs<Stmt>("sortsemantic");
+
+  if ((SortPointers) && !(SortPointers->getBeginLoc().isMacroID())) {
+    SourceRange R = SortPointers->getSourceRange();
+    diag(R.getBegin(), "sorting pointers is nondeterministic") << R;
+  }
+}
+
+} // namespace clang::tidy::bugprone
diff --git a/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h
new file mode 100644
index 000000000000000..698872fefca9042
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/bugprone/NondeterministicPointerIterationOrderCheck.h
@@ -0,0 +1,39 @@
+//=== NondeterministicPointerIterationOrderCheck.h - clang-tidy -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang::tidy::bugprone {
+
+/// Finds nondeterministic usages of pointers in unordered containers. The
+/// check also finds calls to sorting-like algorithms on a container of
+/// pointers.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.html
+class NondeterministicPointerIterationOrderCheck : public ClangTidyCheck {
+public:
+  NondeterministicPointerIterationOrderCheck(StringRef Name,
+                                             ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  bool isLanguageVersionSupported(const LangOptions &LangOpts) const override {
+    return LangOpts.CPlusPlus;
+  }
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+  std::optional<TraversalKind> getCheckTraversalKind() const override {
+    return TK_IgnoreUnlessSpelledInSource;
+  }
+};
+
+} // namespace clang::tidy::bugprone
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_NONDETERMINISTIC_POINTER_ITERATION_ORDER_CHECK_H
diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp
index 2a0cc403b726e8d..3132067f3d5ece6 100644
--- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp
@@ -80,9 +80,13 @@ unsigned getNumberOfDesignated(const InitListExpr *SyntacticInitList) {
   });
 }
 
-AST_MATCHER(CXXRecordDecl, isAggregate) { return Node.isAggregate(); }
+AST_MATCHER(CXXRecordDecl, isAggregate) {
+  return Node.hasDefinition() && Node.isAggregate();
+}
 
-AST_MATCHER(CXXRecordDecl, isPOD) { return Node.isPOD(); }
+AST_MATCHER(CXXRecordDecl, isPOD) {
+  return Node.hasDefinition() && Node.isPOD();
+}
 
 AST_MATCHER(InitListExpr, isFullyDesignated) {
   if (const InitListExpr *SyntacticForm =
diff --git a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp
index 1cb95c2b2347b76..e0b9939681794f7 100644
--- a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp
@@ -123,6 +123,13 @@ AST_MATCHER(EnumDecl, hasSequentialInitialValues) {
   return !AllEnumeratorsArePowersOfTwo;
 }
 
+std::string getName(const EnumDecl *Decl) {
+  if (!Decl->getDeclName())
+    return "<unnamed>";
+
+  return Decl->getQualifiedNameAsString();
+}
+
 } // namespace
 
 EnumInitialValueCheck::EnumInitialValueCheck(StringRef Name,
@@ -160,10 +167,11 @@ void EnumInitialValueCheck::registerMatchers(MatchFinder *Finder) {
 void EnumInitialValueCheck::check(const MatchFinder::MatchResult &Result) {
   if (const auto *Enum = Result.Nodes.getNodeAs<EnumDecl>("inconsistent")) {
     DiagnosticBuilder Diag =
-        diag(Enum->getBeginLoc(),
-             "initial values in enum %0 are not consistent, consider explicit "
-             "initialization of all, none or only the first enumerator")
-        << Enum;
+        diag(
+            Enum->getBeginLoc(),
+            "initial values in enum '%0' are not consistent, consider explicit "
+            "initialization of all, none or only the first enumerator")
+        << getName(Enum);
     for (const EnumConstantDecl *ECD : Enum->enumerators())
       if (ECD->getInitExpr() == nullptr) {
         const SourceLocation EndLoc = Lexer::getLocForEndOfToken(
@@ -183,16 +191,16 @@ void EnumInitialValueCheck::check(const MatchFinder::MatchResult &Result) {
     if (Loc.isInvalid() || Loc.isMacroID())
       return;
     DiagnosticBuilder Diag = diag(Loc, "zero initial value for the first "
-                                       "enumerator in %0 can be disregarded")
-                             << Enum;
+                                       "enumerator in '%0' can be disregarded")
+                             << getName(Enum);
     cleanInitialValue(Diag, ECD, *Result.SourceManager, getLangOpts());
     return;
   }
   if (const auto *Enum = Result.Nodes.getNodeAs<EnumDecl>("sequential")) {
     DiagnosticBuilder Diag =
         diag(Enum->getBeginLoc(),
-             "sequential initial value in %0 can be ignored")
-        << Enum;
+             "sequential initial value in '%0' can be ignored")
+        << getName(Enum);
     for (const EnumConstantDecl *ECD : llvm::drop_begin(Enum->enumerators()))
       cleanInitialValue(Diag, ECD, *Result.SourceManager, getLangOpts());
     return;
diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp
index 968a4a55a6d7988..f9fd1d903e231e1 100644
--- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp
@@ -10,6 +10,7 @@
 #include "../utils/FixItHintUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Tooling/FixIt.h"
 #include <queue>
@@ -26,6 +27,8 @@ AST_MATCHER(Stmt, isMacroExpansion) {
   return SM.isMacroBodyExpansion(Loc) || SM.isMacroArgExpansion(Loc);
 }
 
+AST_MATCHER(Stmt, isC23) { return Finder->getASTContext().getLangOpts().C23; }
+
 bool isNULLMacroExpansion(const Stmt *Statement, ASTContext &Context) {
   SourceManager &SM = Context.getSourceManager();
   const LangOptions &LO = Context.getLangOpts();
@@ -298,6 +301,11 @@ void ImplicitBoolConversionCheck::registerMatchers(MatchFinder *Finder) {
                          hasCastKind(CK_FloatingToBoolean),
                          hasCastKind(CK_PointerToBoolean),
                          hasCastKind(CK_MemberPointerToBoolean)),
+                   // Exclude cases of C23 comparison result.
+                   unless(allOf(isC23(),
+                                hasSourceExpression(ignoringParens(
+                                    binaryOperator(hasAnyOperatorName(
+                                        ">", ">=", "==", "!=", "<", "<=")))))),
                    // Exclude case of using if or while statements with variable
                    // declaration, e.g.:
                    //   if (int var = functionCall()) {}
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 876689c40fcdb21..ccebf74e8a67e73 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -119,6 +119,12 @@ New checks
   Warns about code that tries to cast between pointers by means of
   ``std::bit_cast`` or ``memcpy``.
 
+- New :doc:`bugprone-nondeterministic-pointer-iteration-order
+  <clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order>`
+  check.
+
+  Finds nondeterministic usages of pointers in unordered containers.
+
 - New :doc:`bugprone-tagged-union-member-count
   <clang-tidy/checks/bugprone/tagged-union-member-count>` check.
 
@@ -210,6 +216,10 @@ Changes in existing checks
   a false positive when only an implicit conversion happened inside an
   initializer list.
 
+- Improved :doc:`modernize-use-designated-initializers
+  <clang-tidy/checks/modernize/use-designated-initializers>` check to fix a
+  crash when a class is declared but not defined.
+
 - Improved :doc:`modernize-use-nullptr
   <clang-tidy/checks/modernize/use-nullptr>` check to also recognize
   ``NULL``/``__null`` (but not ``0``) when used with a templated type.
@@ -243,13 +253,15 @@ Changes in existing checks
 
 - Improved :doc:`readability-enum-initial-value
   <clang-tidy/checks/readability/enum-initial-value>` check by only issuing
-  diagnostics for the definition of an ``enum``, and by fixing a typo in the
+  diagnostics for the definition of an ``enum``, by not emitting a redundant
+  file path for anonymous enums in the diagnostic, and by fixing a typo in the
   diagnostic.
 
 - Improved :doc:`readability-implicit-bool-conversion
   <clang-tidy/checks/readability/implicit-bool-conversion>` check
   by adding the option `UseUpperCaseLiteralSuffix` to select the
-  case of the literal suffix in fixes.
+  case of the literal suffix in fixes and fixing false positive for implicit
+  conversion of comparison result in C23.
 
 - Improved :doc:`readability-redundant-smartptr-get
   <clang-tidy/checks/readability/redundant-smartptr-get>` check to
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst
new file mode 100644
index 000000000000000..41be0bf1c677ec5
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/nondeterministic-pointer-iteration-order.rst
@@ -0,0 +1,44 @@
+.. title:: clang-tidy - bugprone-nondeterministic-pointer-iteration-order
+
+bugprone-nondeterministic-pointer-iteration-order
+=================================================
+
+Finds nondeterministic usages of pointers in unordered containers.
+
+One canonical example is iteration across a container of pointers.
+
+.. code-block:: c++
+
+  {
+    int a = 1, b = 2;
+    std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
+    for (auto i : UnorderedPtrSet)
+      f(i);
+  }
+
+Another such example is sorting a container of pointers.
+
+.. code-block:: c++
+
+  {
+    int a = 1, b = 2;
+    std::vector<int *> VectorOfPtr = {&a, &b};
+    std::sort(VectorOfPtr.begin(), VectorOfPtr.end());
+  }
+
+Iteration of a containers of pointers may present the order of different
+pointers differently across different runs of a program. In some cases this
+may be acceptable behavior, in others this may be unexpected behavior. This
+check is advisory for this reason.
+
+This check only detects range-based for loops over unordered sets and maps. It
+also detects calls sorting-like algorithms on containers holding pointers.
+Other similar usages will not be found and are false negatives.
+
+Limitations:
+
+* This check currently does not check if a nondeterministic iteration order is
+  likely to be a mistake, and instead marks all such iterations as bugprone.
+* std::reference_wrapper is not considered yet.
+* Only for loops are considered, other iterators can be included in
+  improvements.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 0082234f5ed31bd..d731b13fc0df446 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -115,6 +115,7 @@ Clang-Tidy Checks
    :doc:`bugprone-multiple-new-in-one-expression <bugprone/multiple-new-in-one-expression>`,
    :doc:`bugprone-multiple-statement-macro <bugprone/multiple-statement-macro>`,
    :doc:`bugprone-no-escape <bugprone/no-escape>`,
+   :doc:`bugprone-nondeterministic-pointer-iteration-order <bugprone/nondeterministic-pointer-iteration-order>`,
    :doc:`bugprone-non-zero-enum-to-bool-conversion <bugprone/non-zero-enum-to-bool-conversion>`,
    :doc:`bugprone-not-null-terminated-result <bugprone/not-null-terminated-result>`, "Yes"
    :doc:`bugprone-optional-value-conversion <bugprone/optional-value-conversion>`, "Yes"
diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp
index 0e76c539aa3c839..b536ee00497c03f 100644
--- a/clang-tools-extra/modularize/CoverageChecker.cpp
+++ b/clang-tools-extra/modularize/CoverageChecker.cpp
@@ -223,10 +223,9 @@ bool CoverageChecker::collectModuleHeaders(const Module &Mod) {
       return false;
   }
 
-  for (auto &HeaderKind : Mod.Headers)
-    for (auto &Header : HeaderKind)
-      ModuleMapHeadersSet.insert(
-          ModularizeUtilities::getCanonicalPath(Header.Entry.getName()));
+  for (const auto &Header : Mod.getAllHeaders())
+    ModuleMapHeadersSet.insert(
+        ModularizeUtilities::getCanonicalPath(Header.Entry.getName()));
 
   for (auto *Submodule : Mod.submodules())
     collectModuleHeaders(*Submodule);
diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp
index b202b3aae8f8a3a..476e13770a94f6c 100644
--- a/clang-tools-extra/modularize/ModularizeUtilities.cpp
+++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp
@@ -358,7 +358,7 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
   } else if (std::optional<clang::Module::DirectoryName> UmbrellaDir =
                  Mod.getUmbrellaDirAsWritten()) {
     // If there normal headers, assume these are umbrellas and skip collection.
-    if (Mod.Headers->size() == 0) {
+    if (Mod.getHeaders(Module::HK_Normal).empty()) {
       // Collect headers in umbrella directory.
       if (!collectUmbrellaHeaders(UmbrellaDir->Entry.getName(),
                                   UmbrellaDependents))
@@ -371,16 +371,8 @@ bool ModularizeUtilities::collectModuleHeaders(const clang::Module &Mod) {
   // modules or because they are meant to be included by another header,
   // and thus should be ignored by modularize.
 
-  int NormalHeaderCount = Mod.Headers[clang::Module::HK_Normal].size();
-
-  for (int Index = 0; Index < NormalHeaderCount; ++Index) {
-    DependentsVector NormalDependents;
-    // Collect normal header.
-    const clang::Module::Header &Header(
-      Mod.Headers[clang::Module::HK_Normal][Index]);
-    std::string HeaderPath = getCanonicalPath(Header.Entry.getName());
-    HeaderFileNames.push_back(HeaderPath);
-  }
+  for (const auto &Header : Mod.getHeaders(clang::Module::HK_Normal))
+    HeaderFileNames.push_back(getCanonicalPath(Header.Entry.getName()));
 
   int MissingCountThisModule = Mod.MissingHeaders.size();
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm
new file mode 100644
index 000000000000000..6dbca55a8e365ff
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_algorithm
@@ -0,0 +1,31 @@
+#ifndef _SIM_ALGORITHM
+#define _SIM_ALGORITHM
+
+#pragma clang system_header
+
+namespace std {
+
+template<class ForwardIt>
+bool is_sorted(ForwardIt first, ForwardIt last);
+
+template <class RandomIt>
+void nth_element(RandomIt first, RandomIt nth, RandomIt last);
+
+template<class RandomIt>
+void partial_sort(RandomIt first, RandomIt middle, RandomIt last);
+
+template<class RandomIt>
+void sort (RandomIt first, RandomIt last);
+
+template<class RandomIt>
+void stable_sort(RandomIt first, RandomIt last);
+
+template<class BidirIt, class UnaryPredicate>
+BidirIt partition(BidirIt first, BidirIt last, UnaryPredicate p);
+
+template<class BidirIt, class UnaryPredicate>
+BidirIt stable_partition(BidirIt first, BidirIt last, UnaryPredicate p);
+
+} // namespace std
+
+#endif // _SIM_ALGORITHM
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h
new file mode 100644
index 000000000000000..ba98e0cc2208bab
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_c++config.h
@@ -0,0 +1,11 @@
+#ifndef _SIM_CPP_CONFIG_H
+#define _SIM_CPP_CONFIG_H
+
+#pragma clang system_header
+
+typedef unsigned char uint8_t;
+
+typedef __typeof__(sizeof(int)) size_t;
+typedef __typeof__((char*)0-(char*)0) ptrdiff_t;
+
+#endif // _SIM_CPP_CONFIG_H
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list
new file mode 100644
index 000000000000000..e4d9d534b3bd78c
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_initializer_list
@@ -0,0 +1,39 @@
+#ifndef _INITIALIZER_LIST
+#define _INITIALIZER_LIST
+
+#pragma clang system_header
+#
+#include "sim_c++config.h" // size_t
+
+namespace std {
+
+template <class _E>
+class initializer_list  {
+  const _E* __begin_;
+  size_t    __size_;
+
+  initializer_list(const _E* __b, size_t __s)
+    : __begin_(__b),
+      __size_(__s)
+  {}
+
+public:
+  typedef _E        value_type;
+  typedef const _E& reference;
+  typedef const _E& const_reference;
+  typedef size_t    size_type;
+
+  typedef const _E* iterator;
+  typedef const _E* const_iterator;
+
+  initializer_list() : __begin_(0), __size_(0) {}
+
+  size_t    size()  const {return __size_;}
+  const _E* begin() const {return __begin_;}
+  const _E* end()   const {return __begin_ + __size_;}
+
+}; // class initializer_list
+
+} // namespace std
+
+#endif // _INITIALIZER_LIST
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base
new file mode 100644
index 000000000000000..3b205d1722c9ddc
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_iterator_base
@@ -0,0 +1,22 @@
+#ifndef _SIM_ITERATOR_BASE
+#define _SIM_ITERATOR_BASE
+
+namespace std {
+
+struct input_iterator_tag { };
+struct output_iterator_tag { };
+struct forward_iterator_tag : public input_iterator_tag { };
+struct bidirectional_iterator_tag : public forward_iterator_tag { };
+struct random_access_iterator_tag : public bidirectional_iterator_tag { };
+
+template <typename Iterator> struct iterator_traits {
+  typedef typename Iterator::difference_type difference_type;
+  typedef typename Iterator::value_type value_type;
+  typedef typename Iterator::pointer pointer;
+  typedef typename Iterator::reference reference;
+  typedef typename Iterator::iterator_category iterator_category;
+};
+
+} // namespace std
+
+#endif // _SIM_ITERATOR_BASE
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map
new file mode 100644
index 000000000000000..8c57f5c71f8814a
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_map
@@ -0,0 +1,34 @@
+
+#ifndef _SIM_MAP
+#define _SIM_MAP
+
+#pragma clang system_header
+#include "sim_stl_pair"
+
+namespace std {
+
+template <typename Key, typename Value>
+class map {
+  public:
+    using value_type = pair<Key, Value>;
+    map();
+    map(initializer_list<pair<Key, Value>> initList);
+    value_type& operator[](const Key& key);
+    value_type& operator[](Key&& key);
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+} // namespace std
+
+#endif // _SIM_MAP
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set
new file mode 100644
index 000000000000000..f2f70095538925b
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_set
@@ -0,0 +1,44 @@
+
+#ifndef _SIM_SET
+#define _SIM_SET
+
+#pragma clang system_header
+#include "sim_initializer_list"
+
+namespace std {
+
+template< class T = void >
+struct less;
+
+template< class T >
+struct allocator;
+
+template< class Key >
+struct hash;
+
+template<
+  class Key,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class set {
+  public:
+    set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+} // namespace std
+
+#endif // _SIM_SET
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair
new file mode 100644
index 000000000000000..d244bb363b861a0
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_stl_pair
@@ -0,0 +1,32 @@
+#ifndef _SIM_STL_PAIR
+#define _SIM_STL_PAIR
+
+#pragma clang system_header
+
+#include "sim_type_traits"
+
+namespace std {
+
+template <class T1, class T2>
+struct pair {
+  T1 first;
+  T2 second;
+
+  pair() : first(), second() {}
+  pair(const T1 &a, const T2 &b) : first(a), second(b) {}
+
+  template<class U1, class U2>
+  pair(const pair<U1, U2> &other) : first(other.first),
+                                      second(other.second) {}
+};
+
+template <typename T1, typename T2>
+pair<typename remove_reference<T1>::type, typename remove_reference<T2>::type>
+make_pair(T1 &&, T2 &&) {
+  return {};
+};
+
+} // namespace std
+
+#endif // _SIM_STL_PAIR
+
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits
new file mode 100644
index 000000000000000..f066767c4d98589
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_type_traits
@@ -0,0 +1,19 @@
+
+#ifndef _SIM_TYPE_TRAITS
+#define _SIM_TYPE_TRAITS
+
+#pragma clang system_header
+namespace std {
+
+template< class T > struct remove_reference      {typedef T type;};
+template< class T > struct remove_reference<T&>  {typedef T type;};
+template< class T > struct remove_reference<T&&> {typedef T type;};
+
+template<typename T> typename remove_reference<T>::type&& move(T&& a);
+
+template< class T >
+using remove_reference_t = typename remove_reference<T>::type;
+
+} // namespace std
+
+#endif // _SIM_TYPE_TRAITS
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map
new file mode 100644
index 000000000000000..fabd8e7fd2d7486
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_map
@@ -0,0 +1,33 @@
+#ifndef _SIM_UNORDERED_MAP
+#define _SIM_UNORDERED_MAP
+
+#pragma clang system_header
+#include "sim_initializer_list"
+
+namespace std {
+
+template <typename Key, typename Value>
+class unordered_map {
+public:
+  using value_type = pair<Key, Value>;
+  unordered_map();
+  unordered_map(initializer_list<pair<Key, Value>> initList);
+  value_type& operator[](const Key& key);
+  value_type& operator[](Key&& key);
+  class iterator {
+  public:
+    iterator(Key *key): ptr(key) {}
+    iterator& operator++() { ++ptr; return *this; }
+    bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+    const Key &operator*() const { return *ptr; }
+  private:
+    Key *ptr;
+  };
+  Key *val;
+  iterator begin() const { return iterator(val); }
+  iterator end() const { return iterator(val + 1); }
+};
+
+} // namespace std
+
+#endif // _SIM_UNORDERED_MAP
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set
new file mode 100644
index 000000000000000..a077507bbdcbcb1
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_unordered_set
@@ -0,0 +1,35 @@
+#ifndef _SIM_UNORDERED_SET
+#define _SIM_UNORDERED_SET
+
+#pragma clang system_header
+#include "sim_initializer_list"
+
+namespace std {
+
+template<
+  class Key,
+  class Hash = std::hash<Key>,
+  class Compare = std::less<Key>,
+  class Alloc = std::allocator<Key>
+> class unordered_set {
+  public:
+    unordered_set(initializer_list<Key> __list) {}
+
+    class iterator {
+    public:
+      iterator(Key *key): ptr(key) {}
+      iterator& operator++() { ++ptr; return *this; }
+      bool operator!=(const iterator &other) const { return ptr != other.ptr; }
+      const Key &operator*() const { return *ptr; }
+    private:
+      Key *ptr;
+    };
+
+    Key *val;
+    iterator begin() const { return iterator(val); }
+    iterator end() const { return iterator(val + 1); }
+};
+
+} // namespace std
+
+#endif // _SIM_UNORDERED_SET
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector
new file mode 100644
index 000000000000000..dfa9abfb8863ecc
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/Inputs/system-header-simulator/sim_vector
@@ -0,0 +1,150 @@
+#ifndef _SIM_VECTOR
+#define _SIM_VECTOR
+
+#pragma clang system_header
+
+#include "sim_iterator_base"
+
+namespace std {
+
+template <typename T, typename Ptr, typename Ref> struct __vector_iterator {
+  typedef __vector_iterator<T, T *, T &> iterator;
+  typedef __vector_iterator<T, const T *, const T &> const_iterator;
+
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef Ptr pointer;
+  typedef Ref reference;
+  typedef std::random_access_iterator_tag iterator_category;
+
+  __vector_iterator(const Ptr p = 0) : ptr(p) {}
+  __vector_iterator(const iterator &rhs): ptr(rhs.base()) {}
+  __vector_iterator<T, Ptr, Ref>& operator++() { ++ ptr; return *this; }
+  __vector_iterator<T, Ptr, Ref> operator++(int) {
+    auto tmp = *this;
+    ++ ptr;
+    return tmp;
+  }
+  __vector_iterator<T, Ptr, Ref> operator--() { -- ptr; return *this; }
+  __vector_iterator<T, Ptr, Ref> operator--(int) {
+    auto tmp = *this; -- ptr;
+    return tmp;
+  }
+  __vector_iterator<T, Ptr, Ref> operator+(difference_type n) {
+    return ptr + n;
+  }
+  friend __vector_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __vector_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
+  __vector_iterator<T, Ptr, Ref> operator-(difference_type n) {
+    return ptr - n;
+  }
+  __vector_iterator<T, Ptr, Ref> operator+=(difference_type n) {
+    return ptr += n;
+  }
+  __vector_iterator<T, Ptr, Ref> operator-=(difference_type n) {
+    return ptr -= n;
+  }
+
+  template<typename U, typename Ptr2, typename Ref2>
+  difference_type operator-(const __vector_iterator<U, Ptr2, Ref2> &rhs);
+
+  Ref operator*() const { return *ptr; }
+  Ptr operator->() const { return ptr; }
+
+  Ref operator[](difference_type n) {
+    return *(ptr+n);
+  }
+
+  bool operator==(const iterator &rhs) const { return ptr == rhs.ptr; }
+  bool operator==(const const_iterator &rhs) const { return ptr == rhs.ptr; }
+
+  bool operator!=(const iterator &rhs) const { return ptr != rhs.ptr; }
+  bool operator!=(const const_iterator &rhs) const { return ptr != rhs.ptr; }
+
+  const Ptr& base() const { return ptr; }
+
+private:
+  Ptr ptr;
+};
+
+template<typename T>
+class vector {
+  T *_start;
+  T *_finish;
+  T *_end_of_storage;
+
+public:
+  typedef T value_type;
+  typedef size_t size_type;
+  typedef __vector_iterator<T, T *, T &> iterator;
+  typedef __vector_iterator<T, const T *, const T &> const_iterator;
+
+  vector() : _start(0), _finish(0), _end_of_storage(0) {}
+  template <typename InputIterator>
+  vector(InputIterator first, InputIterator last);
+  vector(const vector &other);
+  vector(vector &&other);
+  ~vector();
+
+  size_t size() const {
+    return size_t(_finish - _start);
+  }
+
+  vector& operator=(const vector &other);
+  vector& operator=(vector &&other);
+  vector& operator=(std::initializer_list<T> ilist);
+
+  void assign(size_type count, const T &value);
+  template <typename InputIterator >
+  void assign(InputIterator first, InputIterator last);
+  void assign(std::initializer_list<T> ilist);
+
+  void clear();
+
+  void push_back(const T &value);
+  void push_back(T &&value);
+  template<class... Args>
+  void emplace_back(Args&&... args);
+  void pop_back();
+
+  iterator insert(const_iterator position, const value_type &val);
+  iterator insert(const_iterator position, size_type n,
+                  const value_type &val);
+  template <typename InputIterator>
+  iterator insert(const_iterator position, InputIterator first,
+                  InputIterator last);
+  iterator insert(const_iterator position, value_type &&val);
+  iterator insert(const_iterator position, initializer_list<value_type> il);
+
+  template <class... Args>
+  iterator emplace(const_iterator position, Args&&... args);
+
+  iterator erase(const_iterator position);
+  iterator erase(const_iterator first, const_iterator last);
+
+  T &operator[](size_t n) {
+    return _start[n];
+  }
+
+  const T &operator[](size_t n) const {
+    return _start[n];
+  }
+
+  iterator begin() { return iterator(_start); }
+  const_iterator begin() const { return const_iterator(_start); }
+  const_iterator cbegin() const { return const_iterator(_start); }
+  iterator end() { return iterator(_finish); }
+  const_iterator end() const { return const_iterator(_finish); }
+  const_iterator cend() const { return const_iterator(_finish); }
+  T& front() { return *begin(); }
+  const T& front() const { return *begin(); }
+  T& back() { return *(end() - 1); }
+  const T& back() const { return *(end() - 1); }
+};
+
+} // namespace std
+
+#endif // _SIM_VECTOR
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp
new file mode 100644
index 000000000000000..91853874d0afc91
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/nondeterministic-pointer-iteration-order.cpp
@@ -0,0 +1,84 @@
+// RUN: %check_clang_tidy %s bugprone-nondeterministic-pointer-iteration-order %t -- -- -I%S -std=c++!4
+
+#include "Inputs/system-header-simulator/sim_set"
+#include "Inputs/system-header-simulator/sim_unordered_set"
+#include "Inputs/system-header-simulator/sim_map"
+#include "Inputs/system-header-simulator/sim_unordered_map"
+#include "Inputs/system-header-simulator/sim_vector"
+#include "Inputs/system-header-simulator/sim_algorithm"
+
+template<class T>
+void f(T x);
+
+void PointerIteration() {
+  int a = 1, b = 2;
+  std::set<int> OrderedIntSet = {a, b};
+  std::set<int *> OrderedPtrSet = {&a, &b};
+  std::unordered_set<int> UnorderedIntSet = {a, b};
+  std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
+  std::map<int, int> IntMap = { std::make_pair(a,a), std::make_pair(b,b) };
+  std::map<int*, int*> PtrMap = { std::make_pair(&a,&a), std::make_pair(&b,&b) };
+  std::unordered_map<int, int> IntUnorderedMap = { std::make_pair(a,a), std::make_pair(b,b) };
+  std::unordered_map<int*, int*> PtrUnorderedMap = { std::make_pair(&a,&a), std::make_pair(&b,&b) };
+
+  for (auto i : OrderedIntSet) // no-warning
+    f(i);
+
+  for (auto i : OrderedPtrSet) // no-warning
+    f(i);
+
+  for (auto i : UnorderedIntSet) // no-warning
+    f(i);
+
+  for (auto i : UnorderedPtrSet)
+    f(i);
+  // CHECK-MESSAGES: :[[@LINE-2]]:17: warning: iteration of pointers is nondeterministic
+
+  for (auto &i : UnorderedPtrSet)
+    f(i);
+  // CHECK-MESSAGES: :[[@LINE-2]]:18: warning: iteration of pointers is nondeterministic
+
+  for (auto &i : IntMap) // no-warning
+    f(i);
+
+  for (auto &i : PtrMap) // no-warning
+    f(i);
+
+  for (auto &i : IntUnorderedMap) // no-warning
+    f(i);
+
+  for (auto &i : PtrUnorderedMap)
+    f(i);
+  // CHECK-MESSAGES: :[[@LINE-2]]:18: warning: iteration of pointers is nondeterministic
+}
+
+bool g (int *x) { return true; }
+bool h (int x) { return true; }
+
+void PointerSorting() {
+  int a = 1, b = 2, c = 3;
+  std::vector<int> V1 = {a, b};
+  std::vector<int *> V2 = {&a, &b};
+
+  std::is_sorted(V1.begin(), V1.end());                    // no-warning
+  std::nth_element(V1.begin(), V1.begin() + 1, V1.end());  // no-warning
+  std::partial_sort(V1.begin(), V1.begin() + 1, V1.end()); // no-warning
+  std::sort(V1.begin(), V1.end());                         // no-warning
+  std::stable_sort(V1.begin(), V1.end());                  // no-warning
+  std::partition(V1.begin(), V1.end(), h);                 // no-warning
+  std::stable_partition(V1.begin(), V1.end(), h);          // no-warning
+  std::is_sorted(V2.begin(), V2.end());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+  std::nth_element(V2.begin(), V2.begin() + 1, V2.end());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+  std::partial_sort(V2.begin(), V2.begin() + 1, V2.end());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+  std::sort(V2.begin(), V2.end());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+  std::stable_sort(V2.begin(), V2.end());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+  std::partition(V2.begin(), V2.end(), g);
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+  std::stable_partition(V2.begin(), V2.end(), g);
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: sorting pointers is nondeterministic
+}
diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp
index 9b769ad0be23cab..048665b2e54ac5a 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp
@@ -201,3 +201,11 @@ DECLARE_S93;
 // CHECK-MESSAGES-MACROS: :[[@LINE-1]]:1: warning: use designated initializer list to initialize 'S9' [modernize-use-designated-initializers]
 // CHECK-MESSAGES-MACROS: :[[@LINE-4]]:28: note: expanded from macro 'DECLARE_S93'
 // CHECK-MESSAGES-MACROS: :[[@LINE-71]]:1: note: aggregate type is defined here
+
+// Issue #113652.
+struct S14;
+
+struct S15{
+  S15(S14& d):d{d}{}
+  S14& d;
+};
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c
index b9a34d0683d7f30..54108585f030f87 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c
@@ -53,6 +53,17 @@ enum EMacro2 {
   // CHECK-FIXES: EMacro2_c = 3,
 };
 
+
+enum {
+  // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: initial values in enum '<unnamed>' are not consistent
+  // CHECK-MESSAGES-ENABLE: :[[@LINE-2]]:1: warning: initial values in enum '<unnamed>' are not consistent
+  EAnonymous_a = 1,
+  EAnonymous_b,
+  // CHECK-FIXES: EAnonymous_b = 2,
+  EAnonymous_c = 3,
+};
+
+
 enum EnumZeroFirstInitialValue {
   EnumZeroFirstInitialValue_0 = 0,
   // CHECK-MESSAGES-ENABLE: :[[@LINE-1]]:3: warning: zero initial value for the first enumerator in 'EnumZeroFirstInitialValue' can be disregarded
@@ -114,4 +125,3 @@ enum WithFwdDeclSequential : int {
   EFS2 = 4,
   // CHECK-FIXES-ENABLE: EFS2 ,
 };
-
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c
index f3dc32c10d640ae..0b231d10adf8fc6 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability/implicit-bool-conversion.c
@@ -304,6 +304,15 @@ void implicitConversionToBoolFromUnaryMinusAndZeroLiterals() {
   // CHECK-FIXES: functionTakingBool((-0.0) != 0.0);
 }
 
+void ignoreImplicitCastToBoolForComparisonResult() {
+  bool boolFromComparison0 = 1 != 0;
+  bool boolFromComparison1 = 1 == 0;
+  bool boolFromComparison2 = 1 > 0;
+  bool boolFromComparison3 = 1 >= 0;
+  bool boolFromComparison4 = 1 < 0;
+  bool boolFromComparison5 = 1 <= 0;
+}
+
 void ignoreExplicitCastsToBool() {
   int integer = 10;
   bool boolComingFromInt = (bool)integer;
diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst
index ee5334b02f7000a..35c218d8e0e8fa4 100644
--- a/clang/Maintainers.rst
+++ b/clang/Maintainers.rst
@@ -33,6 +33,12 @@ AST matchers
 | aaron\@aaronballman.com (email), aaron.ballman (Phabricator), AaronBallman (GitHub), AaronBallman (Discourse), aaronballman (Discord), AaronBallman (IRC)
 
 
+AST Visitors
+~~~~~~~~~~~~
+| Sirraide
+| aeternalmail\@gmail.com (email), Sirraide (GitHub), Ætérnal (Discord), Sirraide (Discourse)
+
+
 Clang LLVM IR generation
 ~~~~~~~~~~~~~~~~~~~~~~~~
 | John McCall
@@ -57,6 +63,12 @@ Analysis & CFG
 | sgatev\@google.com (email), sgatev (Phabricator), sgatev (GitHub)
 
 
+Sema
+~~~~
+| Sirraide
+| aeternalmail\@gmail.com (email), Sirraide (GitHub), Ætérnal (Discord), Sirraide (Discourse)
+
+
 Experimental new constant interpreter
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 | Timm Bäder
@@ -71,6 +83,9 @@ Modules & serialization
 | Michael Spencer
 | bigcheesegs\@gmail.com (email), Bigcheese (Phabricator), Bigcheese (GitHub)
 
+| Vassil Vassilev
+| Vassil.Vassilev\@cern.ch (email), v.g.vassilev (Phabricator), vgvassilev (GitHub)
+
 
 Templates
 ~~~~~~~~~
@@ -78,6 +93,12 @@ Templates
 | ekeane\@nvidia.com (email), ErichKeane (Phabricator), erichkeane (GitHub)
 
 
+Lambdas
+~~~~~~~
+| Corentin Jabot
+| corentin.jabot\@gmail.com (email), cor3ntin (Phabricator), cor3ntin (GitHub)
+
+
 Debug information
 ~~~~~~~~~~~~~~~~~
 | Adrian Prantl
@@ -172,6 +193,12 @@ Attributes
 | ekeane\@nvidia.com (email), ErichKeane (Phabricator), erichkeane (GitHub)
 
 
+Plugins
+~~~~~~~
+| Vassil Vassilev
+| Vassil.Vassilev\@cern.ch (email), v.g.vassilev (Phabricator), vgvassilev (GitHub)
+
+
 Inline assembly
 ~~~~~~~~~~~~~~~
 | Eric Christopher
@@ -225,6 +252,18 @@ C++ conformance
 | Hubert Tong
 | hubert.reinterpretcast\@gmail.com (email), hubert.reinterpretcast (Phabricator), hubert-reinterpretcast (GitHub)
 
+| Shafik Yaghmour
+| shafik.yaghmour\@intel.com (email), shafik (GitHub), shafik.yaghmour (Discord), shafik (Discourse)
+
+| Vlad Serebrennikov
+| serebrennikov.vladislav\@gmail.com (email), Endilll (GitHub), Endill (Discord), Endill (Discourse)
+
+
+C++ Defect Reports
+~~~~~~~~~~~~~~~~~~
+| Vlad Serebrennikov
+| serebrennikov.vladislav\@gmail.com (email), Endilll (GitHub), Endill (Discord), Endill (Discourse)
+
 
 Objective-C/C++ conformance
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -244,6 +283,12 @@ OpenCL conformance
 | anastasia\@compiler-experts.com (email), Anastasia (Phabricator), AnastasiaStulova (GitHub)
 
 
+OpenACC
+~~~~~~~
+| Erich Keane
+| ekeane\@nvidia.com (email), ErichKeane (Phabricator), erichkeane (GitHub)
+
+
 SYCL conformance
 ~~~~~~~~~~~~~~~~
 | Alexey Bader
diff --git a/clang/docs/AddressSanitizer.rst b/clang/docs/AddressSanitizer.rst
index 76fdf559950599c..d937cbfdf583c4b 100644
--- a/clang/docs/AddressSanitizer.rst
+++ b/clang/docs/AddressSanitizer.rst
@@ -26,7 +26,7 @@ Typical slowdown introduced by AddressSanitizer is **2x**.
 How to build
 ============
 
-Build LLVM/Clang with `CMake <https://llvm.org/docs/CMake.html>` and enable
+Build LLVM/Clang with `CMake <https://llvm.org/docs/CMake.html>`_ and enable
 the ``compiler-rt`` runtime. An example CMake configuration that will allow
 for the use/testing of AddressSanitizer:
 
diff --git a/clang/docs/FunctionEffectAnalysis.rst b/clang/docs/FunctionEffectAnalysis.rst
new file mode 100644
index 000000000000000..3f2c4db7bad0cb7
--- /dev/null
+++ b/clang/docs/FunctionEffectAnalysis.rst
@@ -0,0 +1,535 @@
+========================
+Function Effect Analysis
+========================
+
+.. contents::
+  :depth: 3
+  :local:
+
+
+Introduction
+============
+
+Clang Function Effect Analysis is a language extension which can warn about "unsafe"
+constructs. The feature is currently tailored for the Performance Constraint attributes
+``nonblocking`` and ``nonallocating``; functions with these attributes are verified as not
+containing any language constructs or calls to other functions which violate the constraint.
+(See :doc:`AttributeReference`.)
+
+
+The ``nonblocking`` and ``nonallocating`` attributes
+====================================================
+
+Attribute syntax
+----------------
+
+The ``nonblocking`` and ``nonallocating`` attributes apply to function types, allowing them to be
+attached to functions, blocks, function pointers, lambdas, and member functions.
+
+.. code-block:: c++
+
+  // Functions
+  void nonblockingFunction() [[clang::nonblocking]];
+  void nonallocatingFunction() [[clang::nonallocating]];
+
+  // Function pointers
+  void (*nonblockingFunctionPtr)() [[clang::nonblocking]];
+
+  // Typedefs, type aliases.
+  typedef void (*NBFunctionPtrTypedef)() [[clang::nonblocking]];
+  using NBFunctionPtrTypeAlias_gnu = __attribute__((nonblocking)) void (*)();
+  using NBFunctionPtrTypeAlias_std = void (*)() [[clang::nonblocking]];
+
+  // C++ methods
+  struct Struct {
+    void NBMethod() [[clang::nonblocking]];
+  };
+
+  // C++ lambdas
+  auto nbLambda = []() [[clang::nonblocking]] {};
+
+  // Blocks
+  void (^nbBlock)() = ^() [[clang::nonblocking]] {};
+
+The attribute applies only to the function itself. In particular, it does not apply to any nested
+functions or declarations, such as blocks, lambdas, and local classes.
+
+This document uses the C++/C23 syntax ``[[clang::nonblocking]]``, since it parallels the placement
+of the ``noexcept`` specifier, and the attributes have other similarities to ``noexcept``. The GNU
+``__attribute__((nonblocking))`` syntax is also supported. Note that it requires a different
+placement on a C++ type alias.
+
+Like ``noexcept``, ``nonblocking`` and ``nonallocating`` have an optional argument, a compile-time
+constant boolean expression. By default, the argument is ``true``, so ``[[clang::nonblocking]]``
+is equivalent to ``[[clang::nonblocking(true)]]``, and declares the function type as never blocking.
+
+
+Attribute semantics
+-------------------
+
+Together with ``noexcept``, the ``nonallocating`` and ``nonblocking`` attributes define an ordered
+series of performance constraints. From weakest to strongest:
+
+- ``noexcept`` (as per the C++ standard): The function type will never throw an exception.
+- ``nonallocating``: The function type will never allocate memory on the heap or throw an
+  exception.
+- ``nonblocking``: The function type will never block on a lock, allocate memory on the heap,
+  or throw an exception.
+
+``nonblocking`` includes the ``nonallocating`` guarantee.
+
+While ``nonblocking`` and ``nonallocating`` are conceptually a superset of ``noexcept``, neither
+attribute implicitly specifies ``noexcept``. Further, ``noexcept`` has a specified runtime behavior of
+aborting if an exception is thrown, while the ``nonallocating`` and ``nonblocking`` attributes are
+mainly for compile-time analysis and have no runtime behavior, except in code built
+with Clang's :doc:`RealtimeSanitizer`. Nonetheless, Clang emits a
+warning if, in C++, a function is declared ``nonblocking`` or ``nonallocating`` without
+``noexcept``. This diagnostic is controlled by ``-Wperf-constraint-implies-noexcept``.
+
+``nonblocking(true)`` and ``nonallocating(true)`` apply to function *types*, and by extension, to
+function-like declarations. When applied to a declaration with a body, the compiler verifies the
+function, as described in the section "Analysis and warnings", below.
+
+``blocking`` and ``allocating`` are synonyms for ``nonblocking(false)`` and
+``nonallocating(false)``, respectively. They can be used on a function-like declaration to
+explicitly disable any potential inference of ``nonblocking`` or ``nonallocating`` during
+verification. (Inference is described later in this document). ``nonblocking(false)`` and
+``nonallocating(false)`` are legal, but superfluous  when applied to a function *type*
+that is not part of a declarator: ``float (int) [[nonblocking(false)]]`` and
+``float (int)`` are identical types.
+
+For functions with no explicit performance constraint, the worst is assumed: the function
+allocates memory and potentially blocks, unless it can be inferred otherwise. This is detailed in the
+discussion of verification.
+
+The following example describes the meanings of all permutations of the two attributes and arguments:
+
+.. code-block:: c++
+
+  void nb1_na1() [[clang::nonblocking(true)]] [[clang::nonallocating(true)]];
+  // Valid; nonallocating(true) is superfluous but doesn't contradict the guarantee.
+
+  void nb1_na0() [[clang::nonblocking(true)]] [[clang::nonallocating(false)]];
+  // error: 'allocating' and 'nonblocking' attributes are not compatible
+
+  void nb0_na1() [[clang::nonblocking(false)]] [[clang::nonallocating(true)]];
+  // Valid; the function does not allocate memory, but may lock for other reasons.
+
+  void nb0_na0() [[clang::nonblocking(false)]] [[clang::nonallocating(false)]];
+  // Valid.
+
+
+Type conversions
+----------------
+
+A performance constraint can be removed or weakened via an implicit conversion. An attempt to add
+or strengthen a performance constraint is unsafe and results in a warning. The rules for this
+are comparable to that for ``noexcept`` in C++17 and later.
+
+.. code-block:: c++
+
+  void unannotated();
+  void nonblocking() [[clang::nonblocking]];
+  void nonallocating() [[clang::nonallocating]];
+
+  void example()
+  {
+    // It's fine to remove a performance constraint.
+    void (*fp_plain)();
+    fp_plain = unannotated;
+    fp_plain = nonblocking;
+    fp_plain = nonallocating;
+
+    // Adding/spoofing nonblocking is unsafe.
+    void (*fp_nonblocking)() [[clang::nonblocking]];
+    fp_nonblocking = nullptr;
+    fp_nonblocking = nonblocking;
+    fp_nonblocking = unannotated;
+    // ^ warning: attribute 'nonblocking' should not be added via type conversion
+    fp_nonblocking = nonallocating;
+    // ^ warning: attribute 'nonblocking' should not be added via type conversion
+
+    // Adding/spoofing nonallocating is unsafe.
+    void (*fp_nonallocating)() [[clang::nonallocating]];
+    fp_nonallocating = nullptr;
+    fp_nonallocating = nonallocating;
+    fp_nonallocating = nonblocking; // no warning because nonblocking includes nonallocating
+    fp_nonallocating = unannotated;
+    // ^ warning: attribute 'nonallocating' should not be added via type conversion
+  }
+
+Virtual methods
+---------------
+
+In C++, when a virtual method has a performance constraint, overriding methods in
+subclasses inherit the constraint.
+
+.. code-block:: c++
+
+  struct Base {
+    virtual void unsafe();
+    virtual void safe() noexcept [[clang::nonblocking]];
+  };
+
+  struct Derived : public Base {
+    void unsafe() [[clang::nonblocking]] override;
+    // It's okay for an overridden method to be more constrained
+
+    void safe() noexcept override;
+    // This method is implicitly declared `nonblocking`, inherited from Base.
+  };
+
+Redeclarations, overloads, and name mangling
+--------------------------------------------
+
+The ``nonblocking`` and ``nonallocating`` attributes, like ``noexcept``, do not factor into
+argument-dependent lookup and overloaded functions/methods.
+
+First, consider that ``noexcept`` is integral to a function's type:
+
+.. code-block:: c++
+
+  void f1(int);
+  void f1(int) noexcept;
+  // error: exception specification in declaration does not match previous
+  //   declaration
+
+Unlike ``noexcept``, a redeclaration of ``f2`` with an added or stronger performance constraint is
+legal and propagates the attribute to the previous declaration:
+
+.. code-block:: c++
+
+  int f2();
+  int f2() [[clang::nonblocking]]; // redeclaration with stronger constraint is OK.
+
+This greatly eases adoption by making it possible to annotate functions in external libraries
+without modifying library headers.
+
+A redeclaration with a removed or weaker performance constraint produces a warning, paralleling
+the behavior of ``noexcept``:
+
+.. code-block:: c++
+
+  int f2() { return 42; }
+  // warning: attribute 'nonblocking' on function does not match previous declaration
+
+In C++14, the following two declarations of `f3` are identical (a single function). In C++17 they
+are separate overloads:
+
+.. code-block:: c++
+
+  void f3(void (*)());
+  void f3(void (*)() noexcept);
+
+Similarly, the following two declarations of `f4` are separate overloads. This pattern may pose
+difficulties due to ambiguity:
+
+.. code-block:: c++
+
+  void f4(void (*)());
+  void f4(void (*)() [[clang::nonblocking]]);
+
+The attributes have no effect on the mangling of function and method names.
+
+Objective-C
+-----------
+
+The attributes are currently unsupported on Objective-C methods.
+
+Analysis and warnings
+=====================
+
+Constraints
+-----------
+
+Functions declared ``nonallocating`` or ``nonblocking``, when defined, are verified according to the
+following rules. Such functions:
+
+1. May not allocate or deallocate memory on the heap. The analysis follows the calls to
+   ``operator new`` and ``operator delete`` generated by the ``new`` and ``delete`` keywords, and
+   treats them like any other function call. The global ``operator new`` and ``operator delete``
+   aren't declared ``nonblocking`` or ``nonallocating`` and so they are considered unsafe. (This
+   is correct because most memory allocators are not lock-free. Note that the placement form of
+   ``operator new`` is implemented inline in libc++'s ``<new>`` header, and is verifiably
+   ``nonblocking``, since it merely casts the supplied pointer to the result type.)
+
+2. May not throw or catch exceptions. To throw, the compiler must allocate the exception on the
+   heap. (Also, many subclasses of ``std::exception`` allocate a string). Exceptions are
+   deallocated when caught.
+
+3. May not make any indirect function call, via a virtual method, function pointer, or
+   pointer-to-member function, unless the target is explicitly declared with the same
+   ``nonblocking`` or ``nonallocating`` attribute (or stronger).
+
+4. May not make direct calls to any other function, with the following exceptions:
+
+  a. The callee is also explicitly declared with the same ``nonblocking`` or ``nonallocating``
+     attribute (or stronger).
+  b. The callee is defined in the same translation unit as the caller, does not have the ``false``
+     form of the required attribute, and can be verified to have the same attribute or stronger,
+     according to these same rules.
+  c. The callee is a built-in function that is known not to block or allocate.
+  d. The callee is declared ``noreturn`` and, if compiling C++, the callee is also declared
+     ``noexcept``. This special case excludes functions such as ``abort()`` and ``std::terminate()``
+     from the analysis. (The reason for requiring ``noexcept`` in C++ is that a function declared
+     ``noreturn`` could be a wrapper for ``throw``.)
+
+5. May not invoke or access an Objective-C method or property, since ``objc_msgSend()`` calls into
+   the Objective-C runtime, which may allocate memory or otherwise block.
+
+6. May not access thread-local variables. Typically, thread-local variables are allocated on the
+   heap when first accessed.
+
+Functions declared ``nonblocking`` have an additional constraint:
+
+7. May not declare static local variables (e.g. Meyers singletons). The compiler generates a lock
+   protecting the initialization of the variable.
+
+Violations of any of these rules result in warnings, in the ``-Wfunction-effects`` category:
+
+.. code-block:: c++
+
+  void notInline();
+
+  void example() [[clang::nonblocking]]
+  {
+    auto* x = new int;
+    // warning: function with 'nonblocking' attribute must not allocate or deallocate
+    //   memory
+
+    if (x == nullptr) {
+      static Logger* logger = createLogger();
+      // warning: function with 'nonblocking' attribute must not have static local variables
+
+      throw std::runtime_warning{ "null" };
+      // warning: 'nonblocking" function 'example' must not throw exceptions
+    }
+    notInline();
+    // warning: 'function with 'nonblocking' attribute must not call non-'nonblocking' function
+    //   'notInline'
+    // note (on notInline()): declaration cannot be inferred 'nonblocking' because it has no
+    //   definition in this translation unit
+  }
+
+Inferring ``nonblocking`` or ``nonallocating``
+----------------------------------------------
+
+In the absence of a ``nonblocking`` or ``nonallocating`` attribute (whether ``true`` or ``false``),
+a function that is called from a performance-constrained function may be analyzed to
+infer whether it has a desired attribute. This analysis happens when the function is not a virtual
+method, and it has a visible definition within the current translation unit (i.e. its body can be
+traversed).
+
+.. code-block:: c++
+
+  void notInline();
+  int implicitlySafe() { return 42; }
+  void implicitlyUnsafe() { notInline(); }
+
+  void example() [[clang::nonblocking]]
+  {
+    int x = implicitlySafe(); // OK
+    implicitlyUnsafe();
+    // warning: function with 'nonblocking' attribute must not call non-'nonblocking' function
+    //   'implicitlyUnsafe'
+    // note (on implicitlyUnsafe): function cannot be inferred 'nonblocking' because it calls
+    //   non-'nonblocking' function 'notInline'
+    // note (on notInline()): declaration cannot be inferred 'nonblocking' because it has no
+    //   definition in this translation unit
+  }
+
+Lambdas and blocks
+------------------
+
+As mentioned earlier, the performance constraint attributes apply only to a single function and not
+to any code nested inside it, including blocks, lambdas, and local classes. It is possible for a
+nonblocking function to schedule the execution of a blocking lambda on another thread. Similarly, a
+blocking function may create a ``nonblocking`` lambda for use in a realtime context.
+
+Operations which create, destroy, copy, and move lambdas and blocks are analyzed in terms of the
+underlying function calls. For example, the creation of a lambda with captures generates a function
+call to an anonymous struct's constructor, passing the captures as parameters.
+
+Implicit function calls in the AST
+----------------------------------
+
+The ``nonblocking`` / ``nonallocating`` analysis occurs at the Sema phase of analysis in Clang.
+During Sema, there are some constructs which will eventually become function calls, but do not
+appear as function calls in the AST. For example, ``auto* foo = new Foo;`` becomes a declaration
+containing a ``CXXNewExpr`` which is understood as a function call to the global ``operator new``
+(in this example), and a ``CXXConstructExpr``, which, for analysis purposes, is a function call to
+``Foo``'s constructor. Most gaps in the analysis would be due to incomplete knowledge of AST
+constructs which become function calls.
+
+Disabling diagnostics
+---------------------
+
+Function effect diagnostics are controlled by ``-Wfunction-effects``.
+
+A construct like this can be used to exempt code from the checks described here:
+
+.. code-block:: c++
+
+  #define NONBLOCKING_UNSAFE(...)                                    \
+    _Pragma("clang diagnostic push")                                 \
+    _Pragma("clang diagnostic ignored \"-Wunknown-warning-option\"") \
+    _Pragma("clang diagnostic ignored \"-Wfunction-effects\"")       \
+    __VA_ARGS__                                                      \
+    _Pragma("clang diagnostic pop")
+
+Disabling the diagnostic allows for:
+
+- constructs which do block, but which in practice are used in ways to avoid unbounded blocking,
+  e.g. a thread pool with semaphores to coordinate multiple realtime threads;
+- using libraries which are safe but not yet annotated;
+- incremental adoption in a large codebase.
+
+Adoption
+========
+
+There are a few common issues that arise when adopting the ``nonblocking`` and ``nonallocating``
+attributes.
+
+C++ exceptions
+--------------
+
+Exceptions pose a challenge to the adoption of the performance constraints. Common library functions
+which throw exceptions include:
+
++----------------------------------+-----------------------------------------------------------------------+
+| Method                           | Alternative                                                           |
++==================================+=======================================================================+
+| ``std::vector<T>::at()``         | ``operator[](size_t)``, after verifying that the index is in range.   |
++----------------------------------+-----------------------------------------------------------------------+
+| ``std::optional<T>::value()``    | ``operator*``, after checking ``has_value()`` or ``operator bool()``. |
++----------------------------------+-----------------------------------------------------------------------+
+| ``std::expected<T, E>::value()`` | Same as for ``std::optional<T>::value()``.                            |
++----------------------------------+-----------------------------------------------------------------------+
+
+
+``std::function<R(Args...)>``
+-----------------------------
+
+``std::function<R(Args...)>`` is generally incompatible with ``nonblocking`` and ``nonallocating``
+code, because a typical implementation may allocate heap memory in the constructor.
+
+Alternatives:
+
+- ``std::function_ref`` (available in C++26 or as ``llvm::function_ref``). This is appropriate and
+  optimal when a functor's lifetime does not need to extend past the function that created it.
+
+- ``inplace_function`` from WG14. This solves the allocation problem by giving the functor wrapper
+  a fixed size known at compile time and using an inline buffer.
+
+While these alternatives both address the heap allocation of ``std::function``, they are still
+obstacles to ``nonblocking/nonallocating`` verification, for reasons detailed in the next section.
+
+
+Interactions with type-erasure techniques
+-----------------------------------------
+
+``std::function<R(Args...)>`` illustrates a common C++ type-erasure technique. Using template
+argument deduction, it decomposes a function type into its return and parameter types. Additional
+components of the function type, including ``noexcept``, ``nonblocking``, ``nonallocating``, and any
+other attributes, are discarded.
+
+Standard library support for these components of a function type is not immediately forthcoming.
+
+Code can work around this limitation in either of two ways:
+
+1. Avoid abstractions like ``std::function`` and instead work directly with the original lambda type.
+
+2. Create a specialized alternative, e.g. ``nonblocking_function_ref<R(Args...)>`` where all function
+   pointers used in the implementation and its interface are ``nonblocking``.
+
+As an example of the first approach, when using a lambda as a *Callable* template parameter, the
+attribute is preserved:
+
+.. code-block:: c++
+
+  std::sort(vec.begin(), vec.end(),
+    [](const Elem& a, const Elem& b) [[clang::nonblocking]] { return a.mem < b.mem; });
+
+Here, the type of the ``Compare`` template parameter is an anonymous class generated from the
+lambda, with an ``operator()`` method holding the ``nonblocking`` attribute.
+
+A complication arises when a *Callable* template parameter, instead of being a lambda or class
+implementing ``operator()``, is a function pointer:
+
+.. code-block:: c++
+
+  static bool compare_elems(const Elem& a, const Elem& b) [[clang::nonblocking]] {
+    return a.mem < b.mem; };
+
+  std::sort(vec.begin(), vec.end(), compare_elems);
+
+Here, the type of ``compare_elems`` is decomposed to ``bool(const Elem&, const Elem&)``, without
+``nonblocking``, when forming the template parameter. This can be solved using the second approach,
+creating a specialized alternative which explicitly requires the attribute. In this case, it's
+possible to use a small wrapper to transform the function pointer into a functor:
+
+.. code-block:: c++
+
+  template <typename>
+  class nonblocking_fp;
+
+  template <typename R, typename... Args>
+  class nonblocking_fp<R(Args...)> {
+  public:
+    using impl_t = R (*)(Args...) [[clang::nonblocking]];
+
+  private:
+    impl_t mImpl{ nullptr_t };
+  public:
+    nonblocking_fp() = default;
+    nonblocking_fp(impl_t f) : mImpl{ f } {}
+
+    R operator()(Args... args) const
+    {
+      return mImpl(std::forward<Args>(args)...);
+    }
+  };
+
+  // deduction guide (like std::function's)
+  template< class R, class... ArgTypes >
+  nonblocking_fp( R(*)(ArgTypes...) ) -> nonblocking_fp<R(ArgTypes...)>;
+
+  // --
+
+  // Wrap the function pointer in a functor which preserves ``nonblocking``.
+  std::sort(vec.begin(), vec.end(), nonblocking_fp{ compare_elems });
+
+Now, the ``nonblocking`` attribute of ``compare_elems`` is verified when it is converted to a
+``nonblocking`` function pointer, as the argument to ``nonblocking_fp``'s constructor. The template
+parameter is the functor class ``nonblocking_fp``.
+
+
+Static local variables
+----------------------
+
+Static local variables are often used for lazily-constructed globals (Meyers singletons). Beyond the
+compiler's use of a lock to ensure thread-safe initialization, it is dangerously easy to
+inadvertently trigger initialization, involving heap allocation, from a ``nonblocking`` or
+``nonallocating`` context.
+
+Generally, such singletons need to be replaced by globals, and care must be taken to ensure their
+initialization before they are used from ``nonblocking`` or ``nonallocating`` contexts.
+
+
+Annotating libraries
+--------------------
+
+It can be surprising that the analysis does not depend on knowledge of any primitives; it simply
+assumes the worst, that all function calls are unsafe unless explicitly marked as safe or able to be
+inferred as safe. With ``nonblocking``, this appears to suffice for all but the most primitive of
+spinlocks.
+
+At least for an operating system's C functions, it is possible to define an override header which
+redeclares safe common functions (e.g. ``pthread_self()``) with the addition of ``nonblocking``.
+This may help in adopting the feature incrementally.
+
+It also helps that many of the functions in the standard C libraries (notably ``<math.h>``)
+are treated as built-in functions by Clang, which the diagnosis understands to be safe.
+
+Much of the C++ standard library consists of inline templated functions which work well with
+inference. A small number of primitives may need explicit ``nonblocking/nonallocating`` attributes.
diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst
index 103842e055db700..41b8bbb33baf144 100644
--- a/clang/docs/RealtimeSanitizer.rst
+++ b/clang/docs/RealtimeSanitizer.rst
@@ -21,7 +21,7 @@ The runtime slowdown introduced by RealtimeSanitizer is negligible.
 How to build
 ============
 
-Build LLVM/Clang with `CMake <https://llvm.org/docs/CMake.html>` and enable the
+Build LLVM/Clang with `CMake <https://llvm.org/docs/CMake.html>`_ and enable the
 ``compiler-rt`` runtime. An example CMake configuration that will allow for the
 use/testing of RealtimeSanitizer:
 
@@ -183,6 +183,10 @@ A **partial** list of flags RealtimeSanitizer respects:
      - ``true``
      - boolean
      - If set, use the symbolizer to turn virtual addresses to file/line locations. If false, can greatly speed up the error reporting.
+   * - ``suppressions``
+     - ""
+     - path
+     - If set to a valid suppressions file, will suppress issue reporting. See details in "Disabling", below.
 
 
 Some issues with flags can be debugged using the ``verbosity=$NUM`` flag:
@@ -194,12 +198,43 @@ Some issues with flags can be debugged using the ``verbosity=$NUM`` flag:
    misspelled_flag
    ...
 
-Disabling
----------
+Disabling and suppressing
+-------------------------
 
-In some circumstances, you may want to suppress error reporting in a specific scope.
+There are multiple ways to disable error reporting when using RealtimeSanitizer.
 
-In C++, this is achieved via  ``__rtsan::ScopedDisabler``. Within the scope where the ``ScopedDisabler`` object is instantiated, all sanitizer error reports are suppressed. This suppression applies to the current scope as well as all invoked functions, including any functions called transitively.
+In general, ``ScopedDisabler`` should be preferred, as it is the most performant.
+
+.. list-table:: Suppression methods
+   :widths: 30 15 15 10 70
+   :header-rows: 1
+
+   * - Method
+     - Specified at?
+     - Scope
+     - Run-time cost
+     - Description
+   * - ``ScopedDisabler``
+     - Compile-time
+     - Stack
+     - Very low
+     - Violations are ignored for the lifetime of the ``ScopedDisabler`` object.
+   * - ``function-name-matches`` suppression
+     - Run-time
+     - Single function
+     - Medium
+     - Suppresses intercepted and ``[[clang::blocking]]`` function calls by name.
+   * - ``call-stack-contains`` suppression
+     - Run-time
+     - Stack
+     - High
+     - Suppresses any stack trace contaning the specified pattern.
+    
+
+``ScopedDisabler``
+##################
+
+At compile time, RealtimeSanitizer may be disabled using ``__rtsan::ScopedDisabler``. RTSan ignores any errors originating within the ``ScopedDisabler`` instance variable scope.
 
 .. code-block:: c++
 
@@ -233,6 +268,31 @@ In C, you can use the ``__rtsan_disable()`` and ``rtsan_enable()`` functions to
 
 Each call to ``__rtsan_disable()`` must be paired with a subsequent call to ``__rtsan_enable()`` to restore normal sanitizer functionality. If a corresponding ``rtsan_enable()`` call is not made, the behavior is undefined.
 
+Suppression file
+################
+
+At run-time, suppressions may be specified using a suppressions file passed in ``RTSAN_OPTIONS``. Run-time suppression may be useful if the source cannot be changed.
+
+.. code-block:: console
+
+   > cat suppressions.supp
+   call-stack-contains:MallocViolation
+   call-stack-contains:std::*vector
+   function-name-matches:free
+   function-name-matches:CustomMarkedBlocking*
+   > RTSAN_OPTIONS="suppressions=suppressions.supp" ./a.out
+   ...
+
+Suppressions specified in this file are one of two flavors.
+
+``function-name-matches`` suppresses reporting of any intercepted library call, or function marked ``[[clang::blocking]]`` by name. If, for instance, you know that ``malloc`` is real-time safe on your system, you can disable the check for it via ``function-name-matches:malloc``.
+
+``call-stack-contains`` suppresses reporting of errors in any stack that contains a string matching the pattern specified. For example, suppressing error reporting of any non-real-time-safe behavior in ``std::vector`` may be specified ``call-stack-contains:std::*vector``. You must include symbols in your build for this method to be effective, unsymbolicated stack traces cannot be matched. ``call-stack-contains`` has the highest run-time cost of any method of suppression.
+
+Patterns may be exact matches or are "regex-light" patterns, containing special characters such as ``^$*``.
+
+The number of potential errors suppressed via this method may be seen on exit when using the ``print_stats_on_exit`` flag.
+
 Compile-time sanitizer detection
 --------------------------------
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ed0c0e369fca74c..6085352dfafe6b3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -46,6 +46,12 @@ code bases.
 
 - The ``clang-rename`` tool has been removed.
 
+- Removed support for RenderScript targets. This technology is
+  `officially deprecated <https://developer.android.com/guide/topics/renderscript/compute>`_
+  and users are encouraged to
+  `migrate to Vulkan <https://developer.android.com/guide/topics/renderscript/migrate>`_
+  or other options.
+
 C/C++ Language Potentially Breaking Changes
 -------------------------------------------
 
@@ -133,6 +139,15 @@ C++ Specific Potentially Breaking Changes
     // Fixed version:
     unsigned operator""_udl_name(unsigned long long);
 
+- Clang will now produce an error diagnostic when [[clang::lifetimebound]] is
+  applied on a parameter of a function that returns void. This was previously
+  ignored and had no effect. (#GH107556)
+
+  .. code-block:: c++
+
+    // Now diagnoses with an error.
+    void f(int& i [[clang::lifetimebound]]);
+
 ABI Changes in This Version
 ---------------------------
 
@@ -302,6 +317,16 @@ Modified Compiler Flags
   the ``promoted`` algorithm for complex division when possible rather than the
   less basic (limited range) algorithm.
 
+- The ``-fveclib`` option has been updated to enable ``-fno-math-errno`` for
+  ``-fveclib=ArmPL`` and ``-fveclib=SLEEF``. This gives Clang more opportunities
+  to utilize these vector libraries. The behavior for all other vector function
+  libraries remains unchanged.
+
+- The ``-Wnontrivial-memaccess`` warning has been updated to also warn about
+  passing non-trivially-copyable destrination parameter to ``memcpy``,
+  ``memset`` and similar functions for which it is a documented undefined
+  behavior.
+
 Removed Compiler Flags
 -------------------------
 
@@ -444,7 +469,8 @@ Bug Fixes in This Version
 - Fixed a crash using ``__array_rank`` on 64-bit targets. (#GH113044).
 - The warning emitted for an unsupported register variable type now points to
   the unsupported type instead of the ``register`` keyword (#GH109776).
-- Fixed a crash when emit ctor for global variant with flexible array init  (#GH113187).
+- Fixed a crash when emit ctor for global variant with flexible array init (#GH113187).
+- Fixed a crash when GNU statement expression contains invalid statement (#GH113468).
 
 Bug Fixes to Compiler Builtins
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -546,7 +572,8 @@ Bug Fixes to C++ Support
 - Clang incorrectly considered a class with an anonymous union member to not be
   const-default-constructible even if a union member has a default member initializer.
   (#GH95854).
-- Fixed an assertion failure when evaluating an invalid expression in an array initializer (#GH112140)
+- Fixed an assertion failure when evaluating an invalid expression in an array initializer. (#GH112140)
+- Fixed an assertion failure in range calculations for conditional throw expressions. (#GH111854)
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -614,6 +641,10 @@ X86 Support
   * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
   ``*_(mask(z)))_minmax_s[s|d|h]``.
 
+- Supported intrinsics for ``SM4 and AVX10.2``.
+  * Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and
+  ``_mm512_sm4rnds4_epi32``.
+
 - All intrinsics in adcintrin.h can now be used in constant expressions.
 
 - All intrinsics in adxintrin.h can now be used in constant expressions.
@@ -676,6 +707,15 @@ NetBSD Support
 WebAssembly Support
 ^^^^^^^^^^^^^^^^^^^
 
+The default target CPU, "generic", now enables the `-mnontrapping-fptoint`
+and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations]
+and [Non-trapping float-to-int Conversions] language features, which are
+[widely implemented in engines].
+
+[Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md
+[Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md
+[widely implemented in engines]: https://webassembly.org/features/
+
 AVR Support
 ^^^^^^^^^^^
 
@@ -759,6 +799,12 @@ Moved checkers
   To detect too large arguments passed to malloc, consider using the checker
   ``alpha.taint.TaintedAlloc``.
 
+- The checkers ``alpha.nondeterministic.PointerSorting`` and
+  ``alpha.nondeterministic.PointerIteration`` were moved to a new bugprone
+  checker named ``bugprone-nondeterministic-pointer-iteration-order``. The
+  original checkers were implemented only using AST matching and make more
+  sense as a single clang-tidy check.
+
 .. _release-notes-sanitizers:
 
 Sanitizers
diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst
index 58dbd686a6dc9fb..87b03438e6e0b97 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -3447,37 +3447,6 @@ Limitations:
 
      More details at the corresponding `GitHub issue <https://github.com/llvm/llvm-project/issues/43459>`_.
 
-.. _alpha-nondeterminism-PointerIteration:
-
-alpha.nondeterminism.PointerIteration (C++)
-"""""""""""""""""""""""""""""""""""""""""""
-Check for non-determinism caused by iterating unordered containers of pointers.
-
-.. code-block:: c
-
- void test() {
-  int a = 1, b = 2;
-  std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
-
-  for (auto i : UnorderedPtrSet) // warn
-    f(i);
- }
-
-.. _alpha-nondeterminism-PointerSorting:
-
-alpha.nondeterminism.PointerSorting (C++)
-"""""""""""""""""""""""""""""""""""""""""
-Check for non-determinism caused by sorting of pointers.
-
-.. code-block:: c
-
- void test() {
-  int a = 1, b = 2;
-  std::vector<int *> V = {&a, &b};
-  std::sort(V.begin(), V.end()); // warn
- }
-
-
 alpha.WebKit
 ^^^^^^^^^^^^
 
diff --git a/clang/docs/index.rst b/clang/docs/index.rst
index 0f6fb36c4d3352b..1096432813fac52 100644
--- a/clang/docs/index.rst
+++ b/clang/docs/index.rst
@@ -27,6 +27,7 @@ Using Clang as a Compiler
    ThreadSafetyAnalysis
    SafeBuffers
    DataFlowAnalysisIntro
+   FunctionEffectAnalysis
    AddressSanitizer
    ThreadSanitizer
    MemorySanitizer
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index a4d36f2eacd5d1b..07b4e36f3ef05e5 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -239,7 +239,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::ContextualFoldingSet<DependentTemplateSpecializationType,
                                      ASTContext&>
     DependentTemplateSpecializationTypes;
-  llvm::FoldingSet<PackExpansionType> PackExpansionTypes;
+  mutable llvm::FoldingSet<PackExpansionType> PackExpansionTypes;
   mutable llvm::FoldingSet<ObjCObjectTypeImpl> ObjCObjectTypes;
   mutable llvm::FoldingSet<ObjCObjectPointerType> ObjCObjectPointerTypes;
   mutable llvm::FoldingSet<DependentUnaryTransformType>
@@ -1778,13 +1778,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
       ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS,
       const IdentifierInfo *Name, ArrayRef<TemplateArgument> Args) const;
 
-  TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl);
-
-  /// Get a template argument list with one argument per template parameter
-  /// in a template parameter list, such as for the injected class name of
-  /// a class template.
-  void getInjectedTemplateArgs(const TemplateParameterList *Params,
-                               SmallVectorImpl<TemplateArgument> &Args);
+  TemplateArgument getInjectedTemplateArg(NamedDecl *ParamDecl) const;
 
   /// Form a pack expansion type with the given pattern.
   /// \param NumExpansions The number of expansions for the pack, if known.
@@ -1795,7 +1789,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
   ///        if this is the canonical type of another pack expansion type.
   QualType getPackExpansionType(QualType Pattern,
                                 std::optional<unsigned> NumExpansions,
-                                bool ExpectPackInType = true);
+                                bool ExpectPackInType = true) const;
 
   QualType getObjCInterfaceType(const ObjCInterfaceDecl *Decl,
                                 ObjCInterfaceDecl *PrevDecl = nullptr) const;
diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h
index 0f0c0bf6e4ef4f5..a572e3380f16550 100644
--- a/clang/include/clang/AST/DeclTemplate.h
+++ b/clang/include/clang/AST/DeclTemplate.h
@@ -71,6 +71,9 @@ NamedDecl *getAsNamedDecl(TemplateParameter P);
 class TemplateParameterList final
     : private llvm::TrailingObjects<TemplateParameterList, NamedDecl *,
                                     Expr *> {
+  /// The template argument list of the template parameter list.
+  TemplateArgument *InjectedArgs = nullptr;
+
   /// The location of the 'template' keyword.
   SourceLocation TemplateLoc;
 
@@ -196,6 +199,9 @@ class TemplateParameterList final
 
   bool hasAssociatedConstraints() const;
 
+  /// Get the template argument list of the template parameter list.
+  ArrayRef<TemplateArgument> getInjectedTemplateArgs(const ASTContext &Context);
+
   SourceLocation getTemplateLoc() const { return TemplateLoc; }
   SourceLocation getLAngleLoc() const { return LAngleLoc; }
   SourceLocation getRAngleLoc() const { return RAngleLoc; }
@@ -793,15 +799,6 @@ class RedeclarableTemplateDecl : public TemplateDecl,
     /// The first value in the array is the number of specializations/partial
     /// specializations that follow.
     GlobalDeclID *LazySpecializations = nullptr;
-
-    /// The set of "injected" template arguments used within this
-    /// template.
-    ///
-    /// This pointer refers to the template arguments (there are as
-    /// many template arguments as template parameters) for the
-    /// template, and is allocated lazily, since most templates do not
-    /// require the use of this information.
-    TemplateArgument *InjectedArgs = nullptr;
   };
 
   /// Pointer to the common data shared by all declarations of this
@@ -927,7 +924,10 @@ class RedeclarableTemplateDecl : public TemplateDecl,
   /// Although the C++ standard has no notion of the "injected" template
   /// arguments for a template, the notion is convenient when
   /// we need to perform substitutions inside the definition of a template.
-  ArrayRef<TemplateArgument> getInjectedTemplateArgs();
+  ArrayRef<TemplateArgument>
+  getInjectedTemplateArgs(const ASTContext &Context) const {
+    return getTemplateParameters()->getInjectedTemplateArgs(Context);
+  }
 
   using redecl_range = redeclarable_base::redecl_range;
   using redecl_iterator = redeclarable_base::redecl_iterator;
@@ -2087,10 +2087,6 @@ class ClassTemplatePartialSpecializationDecl
   /// The list of template parameters
   TemplateParameterList *TemplateParams = nullptr;
 
-  /// The set of "injected" template arguments used within this
-  /// partial specialization.
-  TemplateArgument *InjectedArgs = nullptr;
-
   /// The class template partial specialization from which this
   /// class template partial specialization was instantiated.
   ///
@@ -2136,9 +2132,11 @@ class ClassTemplatePartialSpecializationDecl
     return TemplateParams;
   }
 
-  /// Retrieve the template arguments list of the template parameter list
-  /// of this template.
-  ArrayRef<TemplateArgument> getInjectedTemplateArgs();
+  /// Get the template argument list of the template parameter list.
+  ArrayRef<TemplateArgument>
+  getInjectedTemplateArgs(const ASTContext &Context) const {
+    return getTemplateParameters()->getInjectedTemplateArgs(Context);
+  }
 
   /// \brief All associated constraints of this partial specialization,
   /// including the requires clause and any constraints derived from
@@ -2864,10 +2862,6 @@ class VarTemplatePartialSpecializationDecl
   /// The list of template parameters
   TemplateParameterList *TemplateParams = nullptr;
 
-  /// The set of "injected" template arguments used within this
-  /// partial specialization.
-  TemplateArgument *InjectedArgs = nullptr;
-
   /// The variable template partial specialization from which this
   /// variable template partial specialization was instantiated.
   ///
@@ -2914,9 +2908,11 @@ class VarTemplatePartialSpecializationDecl
     return TemplateParams;
   }
 
-  /// Retrieve the template arguments list of the template parameter list
-  /// of this template.
-  ArrayRef<TemplateArgument> getInjectedTemplateArgs();
+  /// Get the template argument list of the template parameter list.
+  ArrayRef<TemplateArgument>
+  getInjectedTemplateArgs(const ASTContext &Context) const {
+    return getTemplateParameters()->getInjectedTemplateArgs(Context);
+  }
 
   /// \brief All associated constraints of this partial specialization,
   /// including the requires clause and any constraints derived from
diff --git a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h
index 9d81cacb507351a..713494178b97bdb 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.h
@@ -37,6 +37,14 @@ struct UncheckedOptionalAccessModelOptions {
   /// can't identify when their results are used safely (across calls),
   /// resulting in false positives in all such cases. Note: this option does not
   /// cover access through `operator[]`.
+  /// FIXME: we currently cache and equate the result of const accessors
+  /// returning pointers, so cover the case of operator-> followed by
+  /// operator->, which covers the common case of smart pointers. We also cover
+  /// some limited cases of returning references (if return type is an optional
+  /// type), so cover some cases of operator* followed by operator*. We don't
+  /// cover mixing operator-> and operator*. Once we are confident in this const
+  /// accessor caching, we shouldn't need the IgnoreSmartPointerDereference
+  /// option anymore.
   bool IgnoreSmartPointerDereference = false;
 };
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h b/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h
index 0938091cd689f02..96c695473b67a19 100644
--- a/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h
+++ b/clang/include/clang/Analysis/FlowSensitive/NoopLattice.h
@@ -14,6 +14,7 @@
 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_NOOP_LATTICE_H
 
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Support/Compiler.h"
 #include "llvm/ADT/Any.h"
 #include <ostream>
 
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 25abf5f3f86b7d2..62f6087e9624662 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -107,7 +107,6 @@
   AARCH64_VECTOR_TYPE(Name, MangledName, Id, SingletonId)
 #endif
 
-
 //===- Vector point types -----------------------------------------------===//
 
 SVE_VECTOR_TYPE_INT("__SVInt8_t",  "__SVInt8_t",  SveInt8,  SveInt8Ty, 16,  8, 1, true)
@@ -201,6 +200,7 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T
 
 SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
 
+AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8_t", "__MFloat8_t", MFloat8, MFloat8Ty, 1, 8, 1)
 AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1)
 AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1)
 
diff --git a/clang/include/clang/Basic/AMDGPUTypes.def b/clang/include/clang/Basic/AMDGPUTypes.def
index e47e544fdc82c1c..d3dff446f9edf01 100644
--- a/clang/include/clang/Basic/AMDGPUTypes.def
+++ b/clang/include/clang/Basic/AMDGPUTypes.def
@@ -15,7 +15,15 @@
   AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
 #endif
 
+#ifndef AMDGPU_NAMED_BARRIER_TYPE
+#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \
+  AMDGPU_TYPE(Name, Id, SingletonId, Width, Align)
+#endif
+
 AMDGPU_OPAQUE_PTR_TYPE("__amdgpu_buffer_rsrc_t", AMDGPUBufferRsrc, AMDGPUBufferRsrcTy, 128, 128, 8)
 
+AMDGPU_NAMED_BARRIER_TYPE("__amdgpu_named_workgroup_barrier_t", AMDGPUNamedWorkgroupBarrier, AMDGPUNamedWorkgroupBarrierTy, 128, 32, 0)
+
 #undef AMDGPU_TYPE
 #undef AMDGPU_OPAQUE_PTR_TYPE
+#undef AMDGPU_NAMED_BARRIER_TYPE
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 0259b6e40ca9623..47c93b48175fc85 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -411,7 +411,6 @@ def SYCL : LangOpt<"SYCLIsDevice">;
 def COnly : LangOpt<"", "!LangOpts.CPlusPlus">;
 def CPlusPlus : LangOpt<"CPlusPlus">;
 def OpenCL : LangOpt<"OpenCL">;
-def RenderScript : LangOpt<"RenderScript">;
 def ObjC : LangOpt<"ObjC">;
 def BlocksSupported : LangOpt<"Blocks">;
 def ObjCAutoRefCount : LangOpt<"ObjCAutoRefCount">;
@@ -1629,14 +1628,6 @@ def OpenCLNoSVM : Attr {
   let ASTNode = 0;
 }
 
-def RenderScriptKernel : Attr {
-  let Spellings = [GNU<"kernel">];
-  let Subjects = SubjectList<[Function]>;
-  let Documentation = [RenderScriptKernelAttributeDocs];
-  let LangOpts = [RenderScript];
-  let SimpleHandler = 1;
-}
-
 def Deprecated : InheritableAttr {
   let Spellings = [GCC<"deprecated">, Declspec<"deprecated">,
                    CXX11<"","deprecated", 201309>,
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index ee8126cadae2322..fbbfc4acdf391ef 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -3702,20 +3702,32 @@ user-declared functions. For example:
 
 .. code-block:: c++
 
+    #include <map>
+    #include <string>
+
+    using namespace std::literals;
+
     // Returns m[key] if key is present, or default_value if not.
     template<typename T, typename U>
     const U &get_or_default(const std::map<T, U> &m [[clang::lifetimebound]],
                             const T &key, /* note, not lifetimebound */
-                            const U &default_value [[clang::lifetimebound]]);
+                            const U &default_value [[clang::lifetimebound]]) {
+      if (auto iter = m.find(key); iter != m.end()) return iter->second;
+      else return default_value;
+    }
 
-    std::map<std::string, std::string> m;
-    // warning: temporary "bar"s that might be bound to local reference 'val'
-    // will be destroyed at the end of the full-expression
-    const std::string &val = get_or_default(m, "foo"s, "bar"s);
+    int main() {
+      std::map<std::string, std::string> m;
+      // warning: temporary bound to local reference 'val1' will be destroyed
+      // at the end of the full-expression
+      const std::string &val1 = get_or_default(m, "foo"s, "bar"s);
 
-    // No warning in this case.
-    std::string def_val = "bar"s;
-    const std::string &val = get_or_default(m, "foo"s, def_val);
+      // No warning in this case.
+      std::string def_val = "bar"s;
+      const std::string &val2 = get_or_default(m, "foo"s, def_val);
+
+      return 0;
+    }
 
 The attribute can be applied to the implicit ``this`` parameter of a member
 function by writing the attribute after the function type:
@@ -5831,21 +5843,6 @@ provided with the regular ``visibility`` attribute.
   }];
 }
 
-def RenderScriptKernelAttributeDocs : Documentation {
-  let Category = DocCatFunction;
-  let Content = [{
-``__attribute__((kernel))`` is used to mark a ``kernel`` function in
-RenderScript.
-
-In RenderScript, ``kernel`` functions are used to express data-parallel
-computations. The RenderScript runtime efficiently parallelizes ``kernel``
-functions to run on computational resources such as multi-core CPUs and GPUs.
-See the RenderScript_ documentation for more information.
-
-.. _RenderScript: https://developer.android.com/guide/topics/renderscript/compute.html
-  }];
-}
-
 def XRayDocs : Documentation {
   let Category = DocCatFunction;
   let Heading = "xray_always_instrument, xray_never_instrument, xray_log_args";
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 90475a361bb8f86..9bd67e0cefebc32 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -4871,6 +4871,12 @@ def HLSLRadians : LangBuiltin<"HLSL_LANG"> {
   let Prototype = "void(...)";
 }
 
+def HLSLSplitDouble: LangBuiltin<"HLSL_LANG"> {
+  let Spellings = ["__builtin_hlsl_elementwise_splitdouble"];
+  let Attributes = [NoThrow, Const];
+  let Prototype = "void(...)";
+}
+
 // Builtins for XRay.
 def XRayCustomEvent : Builtin {
   let Spellings = ["__xray_customevent"];
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421cab..4486eb73a11fa6a 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
 // AVX10 MINMAX
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 8e4718008ece726..34ff49d7238a7f5 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -795,6 +795,10 @@ def warn_cstruct_memaccess : Warning<
   "%1 call is a pointer to record %2 that is not trivial to "
   "%select{primitive-default-initialize|primitive-copy}3">,
   InGroup<NonTrivialMemaccess>;
+def warn_cxxstruct_memaccess : Warning<
+  "first argument in call to "
+  "%0 is a pointer to non-trivially copyable type %1">,
+  InGroup<NonTrivialMemaccess>;
 def note_nontrivial_field : Note<
   "field is non-trivial to %select{copy|default-initialize}0">;
 def err_non_trivial_c_union_in_invalid_context : Error<
@@ -10097,6 +10101,9 @@ def err_lifetimebound_no_object_param : Error<
 def err_lifetimebound_ctor_dtor : Error<
   "'lifetimebound' attribute cannot be applied to a "
   "%select{constructor|destructor}0">;
+def err_lifetimebound_void_return_type : Error<
+  "'lifetimebound' attribute cannot be applied to a parameter of a function "
+  "that returns void">;
 
 // CHECK: returning address/reference of stack memory
 def warn_ret_stack_addr_ref : Warning<
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 68db400c22e6c1c..942fc557c5b9496 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -280,7 +280,6 @@ LANGOPT(OpenMPNoNestedParallelism  , 1, 0, "Assume that no thread in a parallel
 LANGOPT(OpenMPOffloadMandatory  , 1, 0, "Assert that offloading is mandatory and do not create a host fallback.")
 LANGOPT(OpenMPForceUSM     , 1, 0, "Enable OpenMP unified shared memory mode via compiler.")
 LANGOPT(NoGPULib  , 1, 0, "Indicate a build without the standard GPU libraries.")
-LANGOPT(RenderScript      , 1, 0, "RenderScript")
 
 LANGOPT(HLSL, 1, 0, "HLSL")
 ENUM_LANGOPT(HLSLVersion, HLSLLangStd, 16, HLSL_Unset, "HLSL Version")
diff --git a/clang/include/clang/Basic/LangStandard.h b/clang/include/clang/Basic/LangStandard.h
index 56a0d2c95e2b191..49412232c9c5edd 100644
--- a/clang/include/clang/Basic/LangStandard.h
+++ b/clang/include/clang/Basic/LangStandard.h
@@ -39,7 +39,6 @@ enum class Language : uint8_t {
   OpenCL,
   OpenCLCXX,
   CUDA,
-  RenderScript,
   HIP,
   HLSL,
   ///@}
diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index 9c5d33fbb562cc9..dd384c1d76c5fde 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -227,7 +227,7 @@ class alignas(8) Module {
 
   /// A mapping from the submodule name to the index into the
   /// \c SubModules vector at which that submodule resides.
-  llvm::StringMap<unsigned> SubModuleIndex;
+  mutable llvm::StringMap<unsigned> SubModuleIndex;
 
   /// The AST file if this is a top-level module which has a
   /// corresponding serialized AST file, or null otherwise.
@@ -253,8 +253,6 @@ class alignas(8) Module {
     HK_PrivateTextual,
     HK_Excluded
   };
-  static const int NumHeaderKinds = HK_Excluded + 1;
-
   /// Information about a header directive as found in the module map
   /// file.
   struct Header {
@@ -263,17 +261,36 @@ class alignas(8) Module {
     FileEntryRef Entry;
   };
 
-  /// Information about a directory name as found in the module map
-  /// file.
+private:
+  static const int NumHeaderKinds = HK_Excluded + 1;
+  // The begin index for a HeaderKind also acts the end index of HeaderKind - 1.
+  // The extra element at the end acts as the end index of the last HeaderKind.
+  unsigned HeaderKindBeginIndex[NumHeaderKinds + 1] = {};
+  SmallVector<Header, 2> HeadersStorage;
+
+public:
+  ArrayRef<Header> getAllHeaders() const { return HeadersStorage; }
+  ArrayRef<Header> getHeaders(HeaderKind HK) const {
+    assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind");
+    auto BeginIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK];
+    auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
+    return {BeginIt, EndIt};
+  }
+  void addHeader(HeaderKind HK, Header H) {
+    assert(HK < NumHeaderKinds && "Invalid Module::HeaderKind");
+    auto EndIt = HeadersStorage.begin() + HeaderKindBeginIndex[HK + 1];
+    HeadersStorage.insert(EndIt, std::move(H));
+    for (unsigned HKI = HK + 1; HKI != NumHeaderKinds + 1; ++HKI)
+      ++HeaderKindBeginIndex[HKI];
+  }
+
+  /// Information about a directory name as found in the module map file.
   struct DirectoryName {
     std::string NameAsWritten;
     std::string PathRelativeToRootModuleDirectory;
     DirectoryEntryRef Entry;
   };
 
-  /// The headers that are part of this module.
-  SmallVector<Header, 2> Headers[5];
-
   /// Stored information about a header directive that was found in the
   /// module map file but has not been resolved to a file.
   struct UnresolvedHeaderDirective {
@@ -595,7 +612,6 @@ class alignas(8) Module {
   void setParent(Module *M) {
     assert(!Parent);
     Parent = M;
-    Parent->SubModuleIndex[Name] = Parent->SubModules.size();
     Parent->SubModules.push_back(this);
   }
 
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 17262d5968b12d4..25eda907d20a7bf 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -262,9 +262,6 @@ class TargetInfo : public TransferrableTargetInfo,
   LLVM_PREFERRED_TYPE(bool)
   unsigned HasBuiltinMSVaList : 1;
 
-  LLVM_PREFERRED_TYPE(bool)
-  unsigned IsRenderScriptTarget : 1;
-
   LLVM_PREFERRED_TYPE(bool)
   unsigned HasAArch64SVETypes : 1;
 
@@ -1031,9 +1028,6 @@ class TargetInfo : public TransferrableTargetInfo,
   /// available on this target.
   bool hasBuiltinMSVaList() const { return HasBuiltinMSVaList; }
 
-  /// Returns true for RenderScript.
-  bool isRenderScriptTarget() const { return IsRenderScriptTarget; }
-
   /// Returns whether or not the AArch64 SVE built-in types are
   /// available on this target.
   bool hasAArch64SVETypes() const { return HasAArch64SVETypes; }
diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h
index d19f84d198876f5..9d785d878b61dcc 100644
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -271,12 +271,8 @@ class ABIArgInfo {
     // in the unpadded type.
     unsigned unpaddedIndex = 0;
     for (auto eltType : coerceToType->elements()) {
-      if (isPaddingForCoerceAndExpand(eltType)) continue;
-      if (unpaddedStruct) {
-        assert(unpaddedStruct->getElementType(unpaddedIndex) == eltType);
-      } else {
-        assert(unpaddedIndex == 0 && unpaddedCoerceToType == eltType);
-      }
+      if (isPaddingForCoerceAndExpand(eltType))
+        continue;
       unpaddedIndex++;
     }
 
@@ -295,12 +291,8 @@ class ABIArgInfo {
   }
 
   static bool isPaddingForCoerceAndExpand(llvm::Type *eltType) {
-    if (eltType->isArrayTy()) {
-      assert(eltType->getArrayElementType()->isIntegerTy(8));
-      return true;
-    } else {
-      return false;
-    }
+    return eltType->isArrayTy() &&
+           eltType->getArrayElementType()->isIntegerTy(8);
   }
 
   Kind getKind() const { return TheKind; }
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5df6ddd5e6a0c5e..9d595984b63c4bf 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -627,7 +627,6 @@ defvar c23 = LangOpts<"C23">;
 defvar lang_std = LangOpts<"LangStd">;
 defvar open_cl = LangOpts<"OpenCL">;
 defvar cuda = LangOpts<"CUDA">;
-defvar render_script = LangOpts<"RenderScript">;
 defvar hip = LangOpts<"HIP">;
 defvar gnu_mode = LangOpts<"GNUMode">;
 defvar asm_preprocessor = LangOpts<"AsmPreprocessor">;
@@ -1787,6 +1786,12 @@ defm debug_info_for_profiling : BoolFOption<"debug-info-for-profiling",
   PosFlag<SetTrue, [], [ClangOption, CC1Option],
           "Emit extra debug info to make sample profile more accurate">,
   NegFlag<SetFalse>>;
+def fprofile_generate_cold_function_coverage : Flag<["-"], "fprofile-generate-cold-function-coverage">, 
+    Group<f_Group>, Visibility<[ClangOption, CLOption]>,
+    HelpText<"Generate instrumented code to collect coverage info for cold functions into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
+def fprofile_generate_cold_function_coverage_EQ : Joined<["-"], "fprofile-generate-cold-function-coverage=">, 
+    Group<f_Group>, Visibility<[ClangOption, CLOption]>, MetaVarName<"<directory>">,
+    HelpText<"Generate instrumented code to collect coverage info for cold functions into <directory>/default.profraw (overridden by LLVM_PROFILE_FILE env var)">; 
 def fprofile_instr_generate : Flag<["-"], "fprofile-instr-generate">,
     Group<f_Group>, Visibility<[ClangOption, CLOption]>,
     HelpText<"Generate instrumented code to collect execution counts into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
@@ -4673,7 +4678,8 @@ def malign_loops_EQ : Joined<["-"], "malign-loops=">, Group<clang_ignored_m_Grou
 def malign_jumps_EQ : Joined<["-"], "malign-jumps=">, Group<clang_ignored_m_Group>;
 
 let Flags = [TargetSpecific] in {
-def mabi_EQ : Joined<["-"], "mabi=">, Group<m_Group>;
+def mabi_EQ : Joined<["-"], "mabi=">, Group<m_Group>,
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>;
 def malign_branch_EQ : CommaJoined<["-"], "malign-branch=">, Group<m_Group>,
   HelpText<"Specify types of branches to align">;
 def malign_branch_boundary_EQ : Joined<["-"], "malign-branch-boundary=">, Group<m_Group>,
@@ -5633,6 +5639,7 @@ def noprebind : Flag<["-"], "noprebind">;
 def noprofilelib : Flag<["-"], "noprofilelib">;
 def noseglinkedit : Flag<["-"], "noseglinkedit">;
 def nostartfiles : Flag<["-"], "nostartfiles">, Group<Link_Group>;
+def startfiles : Flag<["-"], "startfiles">, Group<Link_Group>;
 def nostdinc : Flag<["-"], "nostdinc">,
   Visibility<[ClangOption, CLOption, DXCOption]>, Group<IncludePath_Group>,
   HelpText<"Disable both standard system #include directories and builtin #include directories">;
@@ -5645,6 +5652,9 @@ def nostdincxx : Flag<["-"], "nostdinc++">, Visibility<[ClangOption, CC1Option]>
 def nostdlib : Flag<["-"], "nostdlib">,
   Visibility<[ClangOption, CLOption, FlangOption, DXCOption]>,
   Group<Link_Group>;
+def stdlib : Flag<["-"], "stdlib">,
+  Visibility<[ClangOption, CLOption, FlangOption, DXCOption]>,
+  Group<Link_Group>;
 def nostdlibxx : Flag<["-"], "nostdlib++">;
 def object : Flag<["-"], "object">;
 def o : JoinedOrSeparate<["-"], "o">,
@@ -6787,6 +6797,10 @@ def flang_deprecated_no_hlfir : Flag<["-"], "flang-deprecated-no-hlfir">,
   Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>,
   HelpText<"Do not use HLFIR lowering (deprecated)">;
 
+def flang_experimental_integer_overflow : Flag<["-"], "flang-experimental-integer-overflow">,
+  Flags<[HelpHidden]>, Visibility<[FlangOption, FC1Option]>,
+  HelpText<"Add nsw flag to internal operations such as do-variable increment (experimental)">;
+
 //===----------------------------------------------------------------------===//
 // FLangOption + CoreOption + NoXarchOption
 //===----------------------------------------------------------------------===//
@@ -7350,6 +7364,7 @@ def mabi_EQ_ieeelongdouble : Flag<["-"], "mabi=ieeelongdouble">,
   HelpText<"Use IEEE 754 quadruple-precision for long double">,
   MarshallingInfoFlag<LangOpts<"PPCIEEELongDouble">>;
 def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">,
+  Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
   HelpText<"Enable the extended Altivec ABI on AIX. Use volatile and nonvolatile vector registers">,
   MarshallingInfoFlag<LangOpts<"EnableAIXExtendedAltivecABI">>;
 def mfloat_abi : Separate<["-"], "mfloat-abi">,
@@ -8110,11 +8125,11 @@ def vtordisp_mode_EQ : Joined<["-"], "vtordisp-mode=">,
 def fnative_half_type: Flag<["-"], "fnative-half-type">,
   HelpText<"Use the native half type for __fp16 instead of promoting to float">,
   MarshallingInfoFlag<LangOpts<"NativeHalfType">>,
-  ImpliedByAnyOf<[open_cl.KeyPath, render_script.KeyPath]>;
+  ImpliedByAnyOf<[open_cl.KeyPath]>;
 def fnative_half_arguments_and_returns : Flag<["-"], "fnative-half-arguments-and-returns">,
   HelpText<"Use the native __fp16 type for arguments and returns (and skip ABI-specific lowering)">,
   MarshallingInfoFlag<LangOpts<"NativeHalfArgsAndReturns">>,
-  ImpliedByAnyOf<[open_cl.KeyPath, render_script.KeyPath, hlsl.KeyPath, hip.KeyPath]>;
+  ImpliedByAnyOf<[open_cl.KeyPath, hlsl.KeyPath, hip.KeyPath]>;
 def fdefault_calling_conv_EQ : Joined<["-"], "fdefault-calling-conv=">,
   HelpText<"Set default calling convention">,
   Values<"cdecl,fastcall,stdcall,vectorcall,regcall,rtdcall">,
diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def
index af186c5df69201b..214c5e7a789f97e 100644
--- a/clang/include/clang/Driver/Types.def
+++ b/clang/include/clang/Driver/Types.def
@@ -55,7 +55,6 @@ TYPE("c++",                      CXX,          PP_CXX,          "cpp",    phases
 TYPE("objective-c++-cpp-output", PP_ObjCXX,    INVALID,         "mii",    phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("objc++-cpp-output",        PP_ObjCXX_Alias, INVALID,      "mii",    phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("objective-c++",            ObjCXX,       PP_ObjCXX,       "mm",     phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link)
-TYPE("renderscript",             RenderScript, PP_C,            "rs",     phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("hlsl",                     HLSL,         PP_CXX,          "hlsl",   phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble)
 
 // C family input files to precompile.
diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h
index 75b567a347cb6cf..53e9e0ec83ddb1f 100644
--- a/clang/include/clang/Lex/ModuleMap.h
+++ b/clang/include/clang/Lex/ModuleMap.h
@@ -546,6 +546,17 @@ class ModuleMap {
   std::pair<Module *, bool> findOrCreateModule(StringRef Name, Module *Parent,
                                                bool IsFramework,
                                                bool IsExplicit);
+  /// Call \c ModuleMap::findOrCreateModule and throw away the information
+  /// whether the module was found or created.
+  Module *findOrCreateModuleFirst(StringRef Name, Module *Parent,
+                                  bool IsFramework, bool IsExplicit) {
+    return findOrCreateModule(Name, Parent, IsFramework, IsExplicit).first;
+  }
+  /// Create new submodule, assuming it does not exist. This function can only
+  /// be called when it is guaranteed that this submodule does not exist yet.
+  /// The parameters have same semantics as \c ModuleMap::findOrCreateModule.
+  Module *createModule(StringRef Name, Module *Parent, bool IsFramework,
+                       bool IsExplicit);
 
   /// Create a global module fragment for a C++ module unit.
   ///
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index f3f4de044fc41a1..38a527d2324ffe8 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1490,7 +1490,7 @@ class Preprocessor {
   /// Mark the file as included.
   /// Returns true if this is the first time the file was included.
   bool markIncluded(FileEntryRef File) {
-    HeaderInfo.getFileInfo(File);
+    HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
     return IncludedFiles.insert(File).second;
   }
 
diff --git a/clang/include/clang/Sema/SemaInternal.h b/clang/include/clang/Sema/SemaInternal.h
index d994d1819b44237..41d05b2bfb078eb 100644
--- a/clang/include/clang/Sema/SemaInternal.h
+++ b/clang/include/clang/Sema/SemaInternal.h
@@ -58,7 +58,7 @@ inline InheritableAttr *getDLLAttr(Decl *D) {
 }
 
 /// Retrieve the depth and index of a template parameter.
-inline std::pair<unsigned, unsigned> getDepthAndIndex(NamedDecl *ND) {
+inline std::pair<unsigned, unsigned> getDepthAndIndex(const NamedDecl *ND) {
   if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(ND))
     return std::make_pair(TTP->getDepth(), TTP->getIndex());
 
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 13173dc96e71aed..b6193866fc71346 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -44,7 +44,7 @@ namespace serialization {
 /// Version 4 of AST files also requires that the version control branch and
 /// revision match exactly, since there is no backward compatibility of
 /// AST files at this time.
-const unsigned VERSION_MAJOR = 31;
+const unsigned VERSION_MAJOR = 32;
 
 /// AST file minor version number supported by this version of
 /// Clang.
@@ -54,7 +54,7 @@ const unsigned VERSION_MAJOR = 31;
 /// for the previous version could still support reading the new
 /// version by ignoring new kinds of subblocks), this number
 /// should be increased.
-const unsigned VERSION_MINOR = 1;
+const unsigned VERSION_MINOR = 0;
 
 /// An ID number that refers to an identifier in an AST file.
 ///
@@ -1149,7 +1149,7 @@ enum PredefinedTypeIDs {
 ///
 /// Type IDs for non-predefined types will start at
 /// NUM_PREDEF_TYPE_IDs.
-const unsigned NUM_PREDEF_TYPE_IDS = 511;
+const unsigned NUM_PREDEF_TYPE_IDS = 513;
 
 // Ensure we do not overrun the predefined types we reserved
 // in the enum PredefinedTypeIDs above.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index b476a40ebd2c8c3..070c1c9a54f48c6 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -2335,6 +2335,8 @@ class ASTReader
   /// Translate a FileID from another module file's FileID space into ours.
   FileID TranslateFileID(ModuleFile &F, FileID FID) const {
     assert(FID.ID >= 0 && "Reading non-local FileID.");
+    if (FID.isInvalid())
+      return FID;
     return FileID::get(F.SLocEntryBaseID + FID.ID - 1);
   }
 
diff --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 349040c15eeb83e..9a6b35c1b9f774e 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -118,8 +118,6 @@ def Debug : Package<"debug">, Hidden;
 
 def CloneDetectionAlpha : Package<"clone">, ParentPackage<Alpha>;
 
-def NonDeterminismAlpha : Package<"nondeterminism">, ParentPackage<Alpha>;
-
 def Fuchsia : Package<"fuchsia">;
 def FuchsiaAlpha : Package<"fuchsia">, ParentPackage<Alpha>;
 
@@ -1711,22 +1709,6 @@ def TaintedDivChecker: Checker<"TaintedDiv">,
 
 } // end "optin.taint"
 
-//===----------------------------------------------------------------------===//
-// NonDeterminism checkers.
-//===----------------------------------------------------------------------===//
-
-let ParentPackage = NonDeterminismAlpha in {
-
-def PointerIterationChecker : Checker<"PointerIteration">,
-  HelpText<"Checks for non-determinism caused by iteration of unordered containers of pointers">,
-  Documentation<HasDocumentation>;
-
-def PointerSortingChecker : Checker<"PointerSorting">,
-  HelpText<"Check for non-determinism caused by sorting of pointers">,
-  Documentation<HasDocumentation>;
-
-} // end alpha.nondeterminism
-
 //===----------------------------------------------------------------------===//
 // Fuchsia checkers.
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 69892bda42b2566..1c3f771f417ccf2 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -5634,7 +5634,7 @@ ASTContext::getDependentTemplateSpecializationType(
   return QualType(T, 0);
 }
 
-TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) {
+TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) const {
   TemplateArgument Arg;
   if (const auto *TTP = dyn_cast<TemplateTypeParmDecl>(Param)) {
     QualType ArgType = getTypeDeclType(TTP);
@@ -5678,23 +5678,15 @@ TemplateArgument ASTContext::getInjectedTemplateArg(NamedDecl *Param) {
   }
 
   if (Param->isTemplateParameterPack())
-    Arg = TemplateArgument::CreatePackCopy(*this, Arg);
+    Arg =
+        TemplateArgument::CreatePackCopy(const_cast<ASTContext &>(*this), Arg);
 
   return Arg;
 }
 
-void
-ASTContext::getInjectedTemplateArgs(const TemplateParameterList *Params,
-                                    SmallVectorImpl<TemplateArgument> &Args) {
-  Args.reserve(Args.size() + Params->size());
-
-  for (NamedDecl *Param : *Params)
-    Args.push_back(getInjectedTemplateArg(Param));
-}
-
 QualType ASTContext::getPackExpansionType(QualType Pattern,
                                           std::optional<unsigned> NumExpansions,
-                                          bool ExpectPackInType) {
+                                          bool ExpectPackInType) const {
   assert((!ExpectPackInType || Pattern->containsUnexpandedParameterPack()) &&
          "Pack expansions must expand one or more parameter packs");
 
diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp
index b7a6c224c80f8e9..513d4512b45cff4 100644
--- a/clang/lib/AST/ByteCode/Interp.cpp
+++ b/clang/lib/AST/ByteCode/Interp.cpp
@@ -400,7 +400,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) {
 }
 
 static bool CheckConstant(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
-  if (!Ptr.isBlockPointer())
+  if (!Ptr.isStatic() || !Ptr.isBlockPointer())
     return true;
   return CheckConstant(S, OpPC, Ptr.getDeclDesc());
 }
@@ -513,8 +513,8 @@ bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr) {
   return false;
 }
 
-bool CheckVolatile(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
-                   AccessKinds AK) {
+static bool CheckVolatile(InterpState &S, CodePtr OpPC, const Pointer &Ptr,
+                          AccessKinds AK) {
   assert(Ptr.isLive());
 
   // FIXME: This check here might be kinda expensive. Maybe it would be better
@@ -1451,6 +1451,11 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E,
         << StorageType << AllocType;
     return false;
   }
+
+  // Can't activate fields in a union, unless the direct base is the union.
+  if (Ptr.inUnion() && !Ptr.isActive() && !Ptr.getBase().getRecord()->isUnion())
+    return CheckActive(S, OpPC, Ptr, AK_Construct);
+
   return true;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 10e33c14f4b455b..b00d2a1768b6b71 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -1670,6 +1670,15 @@ static bool interp__builtin_operator_delete(InterpState &S, CodePtr OpPC,
       S, OpPC, *AllocForm, DynamicAllocator::Form::Operator, BlockDesc, Source);
 }
 
+static bool interp__builtin_arithmetic_fence(InterpState &S, CodePtr OpPC,
+                                             const InterpFrame *Frame,
+                                             const Function *Func,
+                                             const CallExpr *Call) {
+  const Floating &Arg0 = S.Stk.peek<Floating>();
+  S.Stk.push<Floating>(Arg0);
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call, uint32_t BuiltinID) {
   const InterpFrame *Frame = S.Current;
@@ -2111,6 +2120,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
       return false;
     break;
 
+  case Builtin::BI__arithmetic_fence:
+    if (!interp__builtin_arithmetic_fence(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
   default:
     S.FFDiag(S.Current->getLocation(OpPC),
              diag::note_invalid_subexpr_in_const_expr)
diff --git a/clang/lib/AST/ByteCode/Pointer.h b/clang/lib/AST/ByteCode/Pointer.h
index 72e255dba13f6ba..457fe93b2781757 100644
--- a/clang/lib/AST/ByteCode/Pointer.h
+++ b/clang/lib/AST/ByteCode/Pointer.h
@@ -653,15 +653,6 @@ class Pointer {
     return *reinterpret_cast<T *>(asBlockPointer().Pointee->rawData() + Offset);
   }
 
-  /// Dereferences a primitive element.
-  template <typename T> T &elem(unsigned I) const {
-    assert(I < getNumElems());
-    assert(isBlockPointer());
-    assert(asBlockPointer().Pointee);
-    return reinterpret_cast<T *>(asBlockPointer().Pointee->data() +
-                                 sizeof(InitMapPtr))[I];
-  }
-
   /// Whether this block can be read from at all. This is only true for
   /// block pointers that point to a valid location inside that block.
   bool isDereferencable() const {
diff --git a/clang/lib/AST/DeclTemplate.cpp b/clang/lib/AST/DeclTemplate.cpp
index 4a506b7be456429..755ec72f00bf771 100644
--- a/clang/lib/AST/DeclTemplate.cpp
+++ b/clang/lib/AST/DeclTemplate.cpp
@@ -51,7 +51,7 @@ DefaultTemplateArgumentContainsUnexpandedPack(const TemplateParam &P) {
          P.getDefaultArgument().getArgument().containsUnexpandedParameterPack();
 }
 
-TemplateParameterList::TemplateParameterList(const ASTContext& C,
+TemplateParameterList::TemplateParameterList(const ASTContext &C,
                                              SourceLocation TemplateLoc,
                                              SourceLocation LAngleLoc,
                                              ArrayRef<NamedDecl *> Params,
@@ -244,6 +244,17 @@ bool TemplateParameterList::hasAssociatedConstraints() const {
   return HasRequiresClause || HasConstrainedParameters;
 }
 
+ArrayRef<TemplateArgument>
+TemplateParameterList::getInjectedTemplateArgs(const ASTContext &Context) {
+  if (!InjectedArgs) {
+    InjectedArgs = new (Context) TemplateArgument[size()];
+    llvm::transform(*this, InjectedArgs, [&](NamedDecl *ND) {
+      return Context.getInjectedTemplateArg(ND);
+    });
+  }
+  return {InjectedArgs, NumParams};
+}
+
 bool TemplateParameterList::shouldIncludeTypeForArgument(
     const PrintingPolicy &Policy, const TemplateParameterList *TPL,
     unsigned Idx) {
@@ -396,22 +407,6 @@ void RedeclarableTemplateDecl::addSpecializationImpl(
                                       SETraits::getDecl(Entry));
 }
 
-ArrayRef<TemplateArgument> RedeclarableTemplateDecl::getInjectedTemplateArgs() {
-  TemplateParameterList *Params = getTemplateParameters();
-  auto *CommonPtr = getCommonPtr();
-  if (!CommonPtr->InjectedArgs) {
-    auto &Context = getASTContext();
-    SmallVector<TemplateArgument, 16> TemplateArgs;
-    Context.getInjectedTemplateArgs(Params, TemplateArgs);
-    CommonPtr->InjectedArgs =
-        new (Context) TemplateArgument[TemplateArgs.size()];
-    std::copy(TemplateArgs.begin(), TemplateArgs.end(),
-              CommonPtr->InjectedArgs);
-  }
-
-  return llvm::ArrayRef(CommonPtr->InjectedArgs, Params->size());
-}
-
 //===----------------------------------------------------------------------===//
 // FunctionTemplateDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -631,13 +626,10 @@ ClassTemplateDecl::getInjectedClassNameSpecialization() {
   //  expansion (14.5.3) whose pattern is the name of the template parameter
   //  pack.
   ASTContext &Context = getASTContext();
-  TemplateParameterList *Params = getTemplateParameters();
-  SmallVector<TemplateArgument, 16> TemplateArgs;
-  Context.getInjectedTemplateArgs(Params, TemplateArgs);
   TemplateName Name = Context.getQualifiedTemplateName(
       /*NNS=*/nullptr, /*TemplateKeyword=*/false, TemplateName(this));
-  CommonPtr->InjectedClassNameType =
-      Context.getTemplateSpecializationType(Name, TemplateArgs);
+  CommonPtr->InjectedClassNameType = Context.getTemplateSpecializationType(
+      Name, getTemplateParameters()->getInjectedTemplateArgs(Context));
   return CommonPtr->InjectedClassNameType;
 }
 
@@ -1184,20 +1176,6 @@ SourceRange ClassTemplatePartialSpecializationDecl::getSourceRange() const {
   return Range;
 }
 
-ArrayRef<TemplateArgument>
-ClassTemplatePartialSpecializationDecl::getInjectedTemplateArgs() {
-  TemplateParameterList *Params = getTemplateParameters();
-  auto *First = cast<ClassTemplatePartialSpecializationDecl>(getFirstDecl());
-  if (!First->InjectedArgs) {
-    auto &Context = getASTContext();
-    SmallVector<TemplateArgument, 16> TemplateArgs;
-    Context.getInjectedTemplateArgs(Params, TemplateArgs);
-    First->InjectedArgs = new (Context) TemplateArgument[TemplateArgs.size()];
-    std::copy(TemplateArgs.begin(), TemplateArgs.end(), First->InjectedArgs);
-  }
-  return llvm::ArrayRef(First->InjectedArgs, Params->size());
-}
-
 //===----------------------------------------------------------------------===//
 // FriendTemplateDecl Implementation
 //===----------------------------------------------------------------------===//
@@ -1548,20 +1526,6 @@ SourceRange VarTemplatePartialSpecializationDecl::getSourceRange() const {
   return Range;
 }
 
-ArrayRef<TemplateArgument>
-VarTemplatePartialSpecializationDecl::getInjectedTemplateArgs() {
-  TemplateParameterList *Params = getTemplateParameters();
-  auto *First = cast<VarTemplatePartialSpecializationDecl>(getFirstDecl());
-  if (!First->InjectedArgs) {
-    auto &Context = getASTContext();
-    SmallVector<TemplateArgument, 16> TemplateArgs;
-    Context.getInjectedTemplateArgs(Params, TemplateArgs);
-    First->InjectedArgs = new (Context) TemplateArgument[TemplateArgs.size()];
-    std::copy(TemplateArgs.begin(), TemplateArgs.end(), First->InjectedArgs);
-  }
-  return llvm::ArrayRef(First->InjectedArgs, Params->size());
-}
-
 static TemplateParameterList *
 createMakeIntegerSeqParameterList(const ASTContext &C, DeclContext *DC) {
   // typename T
diff --git a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp
index b0bd8274405d02e..da5dda063344f97 100644
--- a/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Models/UncheckedOptionalAccessModel.cpp
@@ -338,6 +338,11 @@ auto isZeroParamConstMemberCall() {
       callee(cxxMethodDecl(parameterCountIs(0), isConst())));
 }
 
+auto isZeroParamConstMemberOperatorCall() {
+  return cxxOperatorCallExpr(
+      callee(cxxMethodDecl(parameterCountIs(0), isConst())));
+}
+
 auto isNonConstMemberCall() {
   return cxxMemberCallExpr(callee(cxxMethodDecl(unless(isConst()))));
 }
@@ -572,9 +577,10 @@ void handleConstMemberCall(const CallExpr *CE,
     return;
   }
 
-  // Cache if the const method returns a boolean type.
+  // Cache if the const method returns a boolean or pointer type.
   // We may decide to cache other return types in the future.
-  if (RecordLoc != nullptr && CE->getType()->isBooleanType()) {
+  if (RecordLoc != nullptr &&
+      (CE->getType()->isBooleanType() || CE->getType()->isPointerType())) {
     Value *Val = State.Lattice.getOrCreateConstMethodReturnValue(*RecordLoc, CE,
                                                                  State.Env);
     if (Val == nullptr)
@@ -597,14 +603,26 @@ void transferValue_ConstMemberCall(const CXXMemberCallExpr *MCE,
       MCE, dataflow::getImplicitObjectLocation(*MCE, State.Env), Result, State);
 }
 
+void transferValue_ConstMemberOperatorCall(
+    const CXXOperatorCallExpr *OCE, const MatchFinder::MatchResult &Result,
+    LatticeTransferState &State) {
+  auto *RecordLoc = cast_or_null<dataflow::RecordStorageLocation>(
+      State.Env.getStorageLocation(*OCE->getArg(0)));
+  handleConstMemberCall(OCE, RecordLoc, Result, State);
+}
+
 void handleNonConstMemberCall(const CallExpr *CE,
                               dataflow::RecordStorageLocation *RecordLoc,
                               const MatchFinder::MatchResult &Result,
                               LatticeTransferState &State) {
-  // When a non-const member function is called, reset some state.
   if (RecordLoc != nullptr) {
+    // When a non-const member function is called, clear all (non-const)
+    // optional fields of the receiver. Const-qualified fields can't be
+    // changed (at least, not without UB).
     for (const auto &[Field, FieldLoc] : RecordLoc->children()) {
-      if (isSupportedOptionalType(Field->getType())) {
+      QualType FieldType = Field->getType();
+      if (!FieldType.isConstQualified() &&
+          isSupportedOptionalType(Field->getType())) {
         auto *FieldRecordLoc = cast_or_null<RecordStorageLocation>(FieldLoc);
         if (FieldRecordLoc) {
           setHasValue(*FieldRecordLoc, State.Env.makeAtomicBoolValue(),
@@ -1016,6 +1034,8 @@ auto buildTransferMatchSwitch() {
       // const accessor calls
       .CaseOfCFGStmt<CXXMemberCallExpr>(isZeroParamConstMemberCall(),
                                         transferValue_ConstMemberCall)
+      .CaseOfCFGStmt<CXXOperatorCallExpr>(isZeroParamConstMemberOperatorCall(),
+                                          transferValue_ConstMemberOperatorCall)
       // non-const member calls that may modify the state of an object.
       .CaseOfCFGStmt<CXXMemberCallExpr>(isNonConstMemberCall(),
                                         transferValue_NonConstMemberCall)
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index da3216ae03af2e6..94caf6a3897bc1c 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -203,8 +203,6 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang,
     Opts.setDefaultFPContractMode(LangOptions::FPM_Fast);
   }
 
-  Opts.RenderScript = Lang == Language::RenderScript;
-
   // OpenCL, C++ and C23 have bool, true, false keywords.
   Opts.Bool = Opts.OpenCL || Opts.CPlusPlus || Opts.C23;
 
diff --git a/clang/lib/Basic/LangStandards.cpp b/clang/lib/Basic/LangStandards.cpp
index 214567a53efe95a..c49d095018b2002 100644
--- a/clang/lib/Basic/LangStandards.cpp
+++ b/clang/lib/Basic/LangStandards.cpp
@@ -37,8 +37,6 @@ StringRef clang::languageToString(Language L) {
     return "OpenCLC++";
   case Language::CUDA:
     return "CUDA";
-  case Language::RenderScript:
-    return "RenderScript";
   case Language::HIP:
     return "HIP";
   case Language::HLSL:
@@ -114,8 +112,6 @@ LangStandard::Kind clang::getDefaultLanguageStandard(clang::Language Lang,
   case Language::CUDA:
   case Language::HIP:
     return LangStandard::lang_gnucxx17;
-  case Language::RenderScript:
-    return LangStandard::lang_c99;
   case Language::HLSL:
     return LangStandard::lang_hlsl202x;
   }
diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp
index ad52fccff5dc7ff..330108d5b3e47f6 100644
--- a/clang/lib/Basic/Module.cpp
+++ b/clang/lib/Basic/Module.cpp
@@ -54,7 +54,6 @@ Module::Module(ModuleConstructorTag, StringRef Name,
     NoUndeclaredIncludes = Parent->NoUndeclaredIncludes;
     ModuleMapIsPrivate = Parent->ModuleMapIsPrivate;
 
-    Parent->SubModuleIndex[Name] = Parent->SubModules.size();
     Parent->SubModules.push_back(this);
   }
 }
@@ -351,11 +350,14 @@ void Module::markUnavailable(bool Unimportable) {
 }
 
 Module *Module::findSubmodule(StringRef Name) const {
-  llvm::StringMap<unsigned>::const_iterator Pos = SubModuleIndex.find(Name);
-  if (Pos == SubModuleIndex.end())
-    return nullptr;
+  // Add new submodules into the index.
+  for (unsigned I = SubModuleIndex.size(), E = SubModules.size(); I != E; ++I)
+    SubModuleIndex[SubModules[I]->Name] = I;
 
-  return SubModules[Pos->getValue()];
+  if (auto It = SubModuleIndex.find(Name); It != SubModuleIndex.end())
+    return SubModules[It->second];
+
+  return nullptr;
 }
 
 Module *Module::getGlobalModuleFragment() const {
@@ -528,7 +530,7 @@ void Module::print(raw_ostream &OS, unsigned Indent, bool Dump) const {
 
   for (auto &K : Kinds) {
     assert(&K == &Kinds[K.Kind] && "kinds in wrong order");
-    for (auto &H : Headers[K.Kind]) {
+    for (auto &H : getHeaders(K.Kind)) {
       OS.indent(Indent + 2);
       OS << K.Prefix << "header \"";
       OS.write_escaped(H.NameAsWritten);
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 145ca545854da7d..86befb1cbc74fc8 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -154,7 +154,6 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   SSERegParmMax = 0;
   HasAlignMac68kSupport = false;
   HasBuiltinMSVaList = false;
-  IsRenderScriptTarget = false;
   HasAArch64SVETypes = false;
   HasRISCVVTypes = false;
   AllowAMDGPUUnsafeFPAtomics = false;
diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index 4917ef015941be4..0021d33c45d7c9b 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -710,12 +710,6 @@ std::unique_ptr<TargetInfo> AllocateTarget(const llvm::Triple &Triple,
 
   case llvm::Triple::dxil:
     return std::make_unique<DirectXTargetInfo>(Triple, Opts);
-  case llvm::Triple::renderscript32:
-    return std::make_unique<LinuxTargetInfo<RenderScript32TargetInfo>>(Triple,
-                                                                       Opts);
-  case llvm::Triple::renderscript64:
-    return std::make_unique<LinuxTargetInfo<RenderScript64TargetInfo>>(Triple,
-                                                                       Opts);
 
   case llvm::Triple::ve:
     return std::make_unique<LinuxTargetInfo<VETargetInfo>>(Triple, Opts);
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 3dbba2b4d25bd6c..3d8de0294d4ba33 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -765,8 +765,6 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
       .Case("i8mm", HasMatMul)
       .Case("bf16", HasBFloat16)
       .Case("sve", FPU & SveMode)
-      .Case("sve-bf16", FPU & SveMode && HasBFloat16)
-      .Case("sve-i8mm", FPU & SveMode && HasMatMul)
       .Case("sve-b16b16", HasSVEB16B16)
       .Case("f32mm", FPU & SveMode && HasMatmulFP32)
       .Case("f64mm", FPU & SveMode && HasMatmulFP64)
@@ -1723,19 +1721,3 @@ TargetInfo::BuiltinVaListKind
 DarwinAArch64TargetInfo::getBuiltinVaListKind() const {
   return TargetInfo::CharPtrBuiltinVaList;
 }
-
-// 64-bit RenderScript is aarch64
-RenderScript64TargetInfo::RenderScript64TargetInfo(const llvm::Triple &Triple,
-                                                   const TargetOptions &Opts)
-    : AArch64leTargetInfo(llvm::Triple("aarch64", Triple.getVendorName(),
-                                       Triple.getOSName(),
-                                       Triple.getEnvironmentName()),
-                          Opts) {
-  IsRenderScriptTarget = true;
-}
-
-void RenderScript64TargetInfo::getTargetDefines(const LangOptions &Opts,
-                                                MacroBuilder &Builder) const {
-  Builder.defineMacro("__RENDERSCRIPT__");
-  AArch64leTargetInfo::getTargetDefines(Opts, Builder);
-}
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 16a02e102e045d6..ea3e4015d842653 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -319,17 +319,6 @@ class LLVM_LIBRARY_VISIBILITY DarwinAArch64TargetInfo
                     MacroBuilder &Builder) const override;
 };
 
-// 64-bit RenderScript is aarch64
-class LLVM_LIBRARY_VISIBILITY RenderScript64TargetInfo
-    : public AArch64leTargetInfo {
-public:
-  RenderScript64TargetInfo(const llvm::Triple &Triple,
-                           const TargetOptions &Opts);
-
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override;
-};
-
 } // namespace targets
 } // namespace clang
 
diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp
index c87300bf2d60e04..370444057b42981 100644
--- a/clang/lib/Basic/Targets/ARM.cpp
+++ b/clang/lib/Basic/Targets/ARM.cpp
@@ -1498,19 +1498,3 @@ void DarwinARMTargetInfo::getOSDefines(const LangOptions &Opts,
                                        MacroBuilder &Builder) const {
   getDarwinDefines(Builder, Opts, Triple, PlatformName, PlatformMinVersion);
 }
-
-RenderScript32TargetInfo::RenderScript32TargetInfo(const llvm::Triple &Triple,
-                                                   const TargetOptions &Opts)
-    : ARMleTargetInfo(llvm::Triple("armv7", Triple.getVendorName(),
-                                   Triple.getOSName(),
-                                   Triple.getEnvironmentName()),
-                      Opts) {
-  IsRenderScriptTarget = true;
-  LongWidth = LongAlign = 64;
-}
-
-void RenderScript32TargetInfo::getTargetDefines(const LangOptions &Opts,
-                                                MacroBuilder &Builder) const {
-  Builder.defineMacro("__RENDERSCRIPT__");
-  ARMleTargetInfo::getTargetDefines(Opts, Builder);
-}
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index df9855a52e61c0f..55ecb99d82d8fb1 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -310,17 +310,6 @@ class LLVM_LIBRARY_VISIBILITY DarwinARMTargetInfo
   DarwinARMTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts);
 };
 
-// 32-bit RenderScript is armv7 with width and align of 'long' set to 8-bytes
-class LLVM_LIBRARY_VISIBILITY RenderScript32TargetInfo
-    : public ARMleTargetInfo {
-public:
-  RenderScript32TargetInfo(const llvm::Triple &Triple,
-                           const TargetOptions &Opts);
-
-  void getTargetDefines(const LangOptions &Opts,
-                        MacroBuilder &Builder) const override;
-};
-
 } // namespace targets
 } // namespace clang
 
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index bf40edb8683b3e7..3b418585ab4a399 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -141,6 +141,13 @@ class RISCVTargetInfo : public TargetInfo {
     return true;
   }
 
+  bool
+  checkCFProtectionReturnSupported(DiagnosticsEngine &Diags) const override {
+    if (ISAInfo->hasExtension("zicfiss"))
+      return true;
+    return TargetInfo::checkCFProtectionReturnSupported(Diags);
+  }
+
   CFBranchLabelSchemeKind getDefaultCFBranchLabelScheme() const override {
     return CFBranchLabelSchemeKind::FuncSig;
   }
diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp
index 4c9df6007b78231..0b380bdf835ffbd 100644
--- a/clang/lib/Basic/Targets/WebAssembly.cpp
+++ b/clang/lib/Basic/Targets/WebAssembly.cpp
@@ -154,20 +154,20 @@ bool WebAssemblyTargetInfo::initFeatureMap(
     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
     const std::vector<std::string> &FeaturesVec) const {
   auto addGenericFeatures = [&]() {
+    Features["bulk-memory"] = true;
     Features["multivalue"] = true;
     Features["mutable-globals"] = true;
+    Features["nontrapping-fptoint"] = true;
     Features["reference-types"] = true;
     Features["sign-ext"] = true;
   };
   auto addBleedingEdgeFeatures = [&]() {
     addGenericFeatures();
     Features["atomics"] = true;
-    Features["bulk-memory"] = true;
     Features["exception-handling"] = true;
     Features["extended-const"] = true;
     Features["fp16"] = true;
     Features["multimemory"] = true;
-    Features["nontrapping-fptoint"] = true;
     Features["tail-call"] = true;
     Features["wide-arithmetic"] = true;
     setSIMDLevel(Features, RelaxedSIMD, true);
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d067ec218b52708..700c2f9a5dbd18d 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1465,7 +1465,7 @@ bool X86TargetInfo::validateAsmConstraint(
     }
   case 'f': // Any x87 floating point stack register.
     // Constraint 'f' cannot be used for output operands.
-    if (Info.ConstraintStr[0] == '=')
+    if (Info.ConstraintStr[0] == '=' || Info.ConstraintStr[0] == '+')
       return false;
     Info.setAllowsRegister();
     return true;
diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp
index be91b85e3a816f8..79300df15d0e29f 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.cpp
+++ b/clang/lib/CodeGen/ABIInfoImpl.cpp
@@ -80,16 +80,6 @@ RValue DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
       Slot);
 }
 
-ABIArgInfo CodeGen::coerceToIntArray(QualType Ty, ASTContext &Context,
-                                     llvm::LLVMContext &LLVMContext) {
-  // Alignment and Size are measured in bits.
-  const uint64_t Size = Context.getTypeSize(Ty);
-  const uint64_t Alignment = Context.getTypeAlign(Ty);
-  llvm::Type *IntType = llvm::Type::getIntNTy(LLVMContext, Alignment);
-  const uint64_t NumElements = (Size + Alignment - 1) / Alignment;
-  return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
-}
-
 void CodeGen::AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
                                  llvm::Value *Array, llvm::Value *Value,
                                  unsigned FirstIndex, unsigned LastIndex) {
diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h
index 2a3ef6b8a6c9610..d9d79c6a55ddb16 100644
--- a/clang/lib/CodeGen/ABIInfoImpl.h
+++ b/clang/lib/CodeGen/ABIInfoImpl.h
@@ -33,23 +33,6 @@ class DefaultABIInfo : public ABIInfo {
                    AggValueSlot Slot) const override;
 };
 
-// Helper for coercing an aggregate argument or return value into an integer
-// array of the same size (including padding) and alignment.  This alternate
-// coercion happens only for the RenderScript ABI and can be removed after
-// runtimes that rely on it are no longer supported.
-//
-// RenderScript assumes that the size of the argument / return value in the IR
-// is the same as the size of the corresponding qualified type. This helper
-// coerces the aggregate type into an array of the same size (including
-// padding).  This coercion is used in lieu of expansion of struct members or
-// other canonical coercions that return a coerced-type of larger size.
-//
-// Ty          - The argument / return value type
-// Context     - The associated ASTContext
-// LLVMContext - The associated LLVMContext
-ABIArgInfo coerceToIntArray(QualType Ty, ASTContext &Context,
-                            llvm::LLVMContext &LLVMContext);
-
 void AssignToArrayRange(CodeGen::CGBuilderTy &Builder, llvm::Value *Array,
                         llvm::Value *Value, unsigned FirstIndex,
                         unsigned LastIndex);
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index e2d03eff8ab4a0f..65d7f5c54a1913e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17,6 +17,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGRecordLayout.h"
+#include "CGValue.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "ConstantEmitter.h"
@@ -25,8 +26,10 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/Expr.h"
 #include "clang/AST/OSLog.h"
 #include "clang/AST/OperationKinds.h"
+#include "clang/AST/Type.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
@@ -67,6 +70,7 @@
 #include "llvm/TargetParser/X86TargetParser.h"
 #include <optional>
 #include <sstream>
+#include <utility>
 
 using namespace clang;
 using namespace CodeGen;
@@ -95,6 +99,76 @@ static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size,
   I->addAnnotationMetadata("auto-init");
 }
 
+static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) {
+  Value *Op0 = CGF->EmitScalarExpr(E->getArg(0));
+  const auto *OutArg1 = dyn_cast<HLSLOutArgExpr>(E->getArg(1));
+  const auto *OutArg2 = dyn_cast<HLSLOutArgExpr>(E->getArg(2));
+
+  CallArgList Args;
+  LValue Op1TmpLValue =
+      CGF->EmitHLSLOutArgExpr(OutArg1, Args, OutArg1->getType());
+  LValue Op2TmpLValue =
+      CGF->EmitHLSLOutArgExpr(OutArg2, Args, OutArg2->getType());
+
+  if (CGF->getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee())
+    Args.reverseWritebacks();
+
+  Value *LowBits = nullptr;
+  Value *HighBits = nullptr;
+
+  if (CGF->CGM.getTarget().getTriple().isDXIL()) {
+
+    llvm::Type *RetElementTy = CGF->Int32Ty;
+    if (auto *Op0VecTy = E->getArg(0)->getType()->getAs<clang::VectorType>())
+      RetElementTy = llvm::VectorType::get(
+          CGF->Int32Ty, ElementCount::getFixed(Op0VecTy->getNumElements()));
+    auto *RetTy = llvm::StructType::get(RetElementTy, RetElementTy);
+
+    CallInst *CI = CGF->Builder.CreateIntrinsic(
+        RetTy, Intrinsic::dx_splitdouble, {Op0}, nullptr, "hlsl.splitdouble");
+
+    LowBits = CGF->Builder.CreateExtractValue(CI, 0);
+    HighBits = CGF->Builder.CreateExtractValue(CI, 1);
+
+  } else {
+    // For Non DXIL targets we generate the instructions.
+
+    if (!Op0->getType()->isVectorTy()) {
+      FixedVectorType *DestTy = FixedVectorType::get(CGF->Int32Ty, 2);
+      Value *Bitcast = CGF->Builder.CreateBitCast(Op0, DestTy);
+
+      LowBits = CGF->Builder.CreateExtractElement(Bitcast, (uint64_t)0);
+      HighBits = CGF->Builder.CreateExtractElement(Bitcast, 1);
+    } else {
+      int NumElements = 1;
+      if (const auto *VecTy =
+              E->getArg(0)->getType()->getAs<clang::VectorType>())
+        NumElements = VecTy->getNumElements();
+
+      FixedVectorType *Uint32VecTy =
+          FixedVectorType::get(CGF->Int32Ty, NumElements * 2);
+      Value *Uint32Vec = CGF->Builder.CreateBitCast(Op0, Uint32VecTy);
+      if (NumElements == 1) {
+        LowBits = CGF->Builder.CreateExtractElement(Uint32Vec, (uint64_t)0);
+        HighBits = CGF->Builder.CreateExtractElement(Uint32Vec, 1);
+      } else {
+        SmallVector<int> EvenMask, OddMask;
+        for (int I = 0, E = NumElements; I != E; ++I) {
+          EvenMask.push_back(I * 2);
+          OddMask.push_back(I * 2 + 1);
+        }
+        LowBits = CGF->Builder.CreateShuffleVector(Uint32Vec, EvenMask);
+        HighBits = CGF->Builder.CreateShuffleVector(Uint32Vec, OddMask);
+      }
+    }
+  }
+  CGF->Builder.CreateStore(LowBits, Op1TmpLValue.getAddress());
+  auto *LastInst =
+      CGF->Builder.CreateStore(HighBits, Op2TmpLValue.getAddress());
+  CGF->EmitWritebacks(Args);
+  return LastInst;
+}
+
 /// getBuiltinLibFunction - Given a builtin id for a function like
 /// "__builtin_fabsf", return a Function* for "fabsf".
 llvm::Constant *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
@@ -18959,6 +19033,14 @@ case Builtin::BI__builtin_hlsl_elementwise_isinf: {
         CGM.getHLSLRuntime().getRadiansIntrinsic(), ArrayRef<Value *>{Op0},
         nullptr, "hlsl.radians");
   }
+  case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
+
+    assert((E->getArg(0)->getType()->hasFloatingRepresentation() &&
+            E->getArg(1)->getType()->hasUnsignedIntegerRepresentation() &&
+            E->getArg(2)->getType()->hasUnsignedIntegerRepresentation()) &&
+           "asuint operands types mismatch");
+    return handleHlslSplitdouble(E, this);
+  }
   }
   return nullptr;
 }
@@ -19055,7 +19137,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
       Args.push_back(llvm::PoisonValue::get(IntTy));
     for (unsigned I = 0; I != E->getNumArgs(); ++I) {
       llvm::Value *V = EmitScalarOrConstFoldImmArg(ICEArguments, I, E);
-      if (I <= !InsertOld && Size < 32) {
+      if (I <= (InsertOld ? 0u : 1u) && Size < 32) {
         if (!DataTy->isIntegerTy())
           V = Builder.CreateBitCast(
               V, llvm::IntegerType::get(Builder.getContext(), Size));
@@ -20492,8 +20574,8 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
 #undef MMA_VARIANTS_B1_XOR
 }
 
-static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
-                         const CallExpr *E) {
+static Value *MakeLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
+                      const CallExpr *E) {
   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
   QualType ArgType = E->getArg(0)->getType();
   clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
@@ -20503,6 +20585,21 @@ static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
       {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
 }
 
+static Value *MakeLdg(CodeGenFunction &CGF, const CallExpr *E) {
+  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
+  QualType ArgType = E->getArg(0)->getType();
+  clang::CharUnits AlignV = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
+  llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
+
+  // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
+  auto *ASC = CGF.Builder.CreateAddrSpaceCast(Ptr, CGF.Builder.getPtrTy(1));
+  auto *LD = CGF.Builder.CreateAlignedLoad(ElemTy, ASC, AlignV.getAsAlign());
+  MDNode *MD = MDNode::get(CGF.Builder.getContext(), {});
+  LD->setMetadata(LLVMContext::MD_invariant_load, MD);
+
+  return LD;
+}
+
 static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
                                const CallExpr *E) {
   Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
@@ -20536,9 +20633,11 @@ static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
     return nullptr;
   }
 
-  if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
-      IntrinsicID == Intrinsic::nvvm_ldu_global_f)
-    return MakeLdgLdu(IntrinsicID, CGF, E);
+  if (BuiltinID == NVPTX::BI__nvvm_ldg_h || BuiltinID == NVPTX::BI__nvvm_ldg_h2)
+    return MakeLdg(CGF, E);
+
+  if (IntrinsicID == Intrinsic::nvvm_ldu_global_f)
+    return MakeLdu(IntrinsicID, CGF, E);
 
   SmallVector<Value *, 16> Args;
   auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
@@ -20675,16 +20774,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
   case NVPTX::BI__nvvm_ldg_ul2:
   case NVPTX::BI__nvvm_ldg_ull:
   case NVPTX::BI__nvvm_ldg_ull2:
-    // PTX Interoperability section 2.2: "For a vector with an even number of
-    // elements, its alignment is set to number of elements times the alignment
-    // of its member: n*alignof(t)."
-    return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
   case NVPTX::BI__nvvm_ldg_f:
   case NVPTX::BI__nvvm_ldg_f2:
   case NVPTX::BI__nvvm_ldg_f4:
   case NVPTX::BI__nvvm_ldg_d:
   case NVPTX::BI__nvvm_ldg_d2:
-    return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
+    // PTX Interoperability section 2.2: "For a vector with an even number of
+    // elements, its alignment is set to number of elements times the alignment
+    // of its member: n*alignof(t)."
+    return MakeLdg(*this, E);
 
   case NVPTX::BI__nvvm_ldu_c:
   case NVPTX::BI__nvvm_ldu_sc:
@@ -20715,13 +20813,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
   case NVPTX::BI__nvvm_ldu_ul2:
   case NVPTX::BI__nvvm_ldu_ull:
   case NVPTX::BI__nvvm_ldu_ull2:
-    return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
+    return MakeLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
   case NVPTX::BI__nvvm_ldu_f:
   case NVPTX::BI__nvvm_ldu_f2:
   case NVPTX::BI__nvvm_ldu_f4:
   case NVPTX::BI__nvvm_ldu_d:
   case NVPTX::BI__nvvm_ldu_d2:
-    return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
+    return MakeLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
 
   case NVPTX::BI__nvvm_atom_cta_add_gen_i:
   case NVPTX::BI__nvvm_atom_cta_add_gen_l:
@@ -21195,14 +21293,11 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
     return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
                         *this);
   case NVPTX::BI__nvvm_ldg_h:
-    return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
   case NVPTX::BI__nvvm_ldg_h2:
-    return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
+    return MakeHalfType(Intrinsic::not_intrinsic, BuiltinID, E, *this);
   case NVPTX::BI__nvvm_ldu_h:
+  case NVPTX::BI__nvvm_ldu_h2:
     return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
-  case NVPTX::BI__nvvm_ldu_h2: {
-    return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
-  }
   case NVPTX::BI__nvvm_cp_async_ca_shared_global_4:
     return MakeCpAsync(Intrinsic::nvvm_cp_async_ca_shared_global_4,
                        Intrinsic::nvvm_cp_async_ca_shared_global_4_s, *this, E,
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 1949b4ceb7f204c..8f4f5d3ed816012 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <optional>
 using namespace clang;
@@ -1410,6 +1411,30 @@ static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr,
   return addr;
 }
 
+static std::pair<llvm::Value *, bool>
+CoerceScalableToFixed(CodeGenFunction &CGF, llvm::FixedVectorType *ToTy,
+                      llvm::ScalableVectorType *FromTy, llvm::Value *V,
+                      StringRef Name = "") {
+  // If we are casting a scalable i1 predicate vector to a fixed i8
+  // vector, first bitcast the source.
+  if (FromTy->getElementType()->isIntegerTy(1) &&
+      FromTy->getElementCount().isKnownMultipleOf(8) &&
+      ToTy->getElementType() == CGF.Builder.getInt8Ty()) {
+    FromTy = llvm::ScalableVectorType::get(
+        ToTy->getElementType(),
+        FromTy->getElementCount().getKnownMinValue() / 8);
+    V = CGF.Builder.CreateBitCast(V, FromTy);
+  }
+  if (FromTy->getElementType() == ToTy->getElementType()) {
+    llvm::Value *Zero = llvm::Constant::getNullValue(CGF.CGM.Int64Ty);
+
+    V->setName(Name + ".coerce");
+    V = CGF.Builder.CreateExtractVector(ToTy, V, Zero, "cast.fixed");
+    return {V, true};
+  }
+  return {V, false};
+}
+
 namespace {
 
 /// Encapsulates information about the way function arguments from
@@ -3196,26 +3221,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       // a VLAT at the function boundary and the types match up, use
       // llvm.vector.extract to convert back to the original VLST.
       if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(ConvertType(Ty))) {
-        llvm::Value *Coerced = Fn->getArg(FirstIRArg);
+        llvm::Value *ArgVal = Fn->getArg(FirstIRArg);
         if (auto *VecTyFrom =
-                dyn_cast<llvm::ScalableVectorType>(Coerced->getType())) {
-          // If we are casting a scalable i1 predicate vector to a fixed i8
-          // vector, bitcast the source and use a vector extract.
-          if (VecTyFrom->getElementType()->isIntegerTy(1) &&
-              VecTyFrom->getElementCount().isKnownMultipleOf(8) &&
-              VecTyTo->getElementType() == Builder.getInt8Ty()) {
-            VecTyFrom = llvm::ScalableVectorType::get(
-                VecTyTo->getElementType(),
-                VecTyFrom->getElementCount().getKnownMinValue() / 8);
-            Coerced = Builder.CreateBitCast(Coerced, VecTyFrom);
-          }
-          if (VecTyFrom->getElementType() == VecTyTo->getElementType()) {
-            llvm::Value *Zero = llvm::Constant::getNullValue(CGM.Int64Ty);
-
+                dyn_cast<llvm::ScalableVectorType>(ArgVal->getType())) {
+          auto [Coerced, Extracted] = CoerceScalableToFixed(
+              *this, VecTyTo, VecTyFrom, ArgVal, Arg->getName());
+          if (Extracted) {
             assert(NumIRArgs == 1);
-            Coerced->setName(Arg->getName() + ".coerce");
-            ArgVals.push_back(ParamValue::forDirect(Builder.CreateExtractVector(
-                VecTyTo, Coerced, Zero, "cast.fixed")));
+            ArgVals.push_back(ParamValue::forDirect(Coerced));
             break;
           }
         }
@@ -3326,16 +3339,33 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       ArgVals.push_back(ParamValue::forIndirect(alloca));
 
       auto coercionType = ArgI.getCoerceAndExpandType();
+      auto unpaddedCoercionType = ArgI.getUnpaddedCoerceAndExpandType();
+      auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType);
+
       alloca = alloca.withElementType(coercionType);
 
       unsigned argIndex = FirstIRArg;
+      unsigned unpaddedIndex = 0;
       for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
         llvm::Type *eltType = coercionType->getElementType(i);
         if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType))
           continue;
 
         auto eltAddr = Builder.CreateStructGEP(alloca, i);
-        auto elt = Fn->getArg(argIndex++);
+        llvm::Value *elt = Fn->getArg(argIndex++);
+
+        auto paramType = unpaddedStruct
+                             ? unpaddedStruct->getElementType(unpaddedIndex++)
+                             : unpaddedCoercionType;
+
+        if (auto *VecTyTo = dyn_cast<llvm::FixedVectorType>(eltType)) {
+          if (auto *VecTyFrom = dyn_cast<llvm::ScalableVectorType>(paramType)) {
+            bool Extracted;
+            std::tie(elt, Extracted) = CoerceScalableToFixed(
+                *this, VecTyTo, VecTyFrom, elt, elt->getName());
+            assert(Extracted && "Unexpected scalable to fixed vector coercion");
+          }
+        }
         Builder.CreateStore(elt, eltAddr);
       }
       assert(argIndex == FirstIRArg + NumIRArgs);
@@ -3930,17 +3960,24 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
 
   case ABIArgInfo::CoerceAndExpand: {
     auto coercionType = RetAI.getCoerceAndExpandType();
+    auto unpaddedCoercionType = RetAI.getUnpaddedCoerceAndExpandType();
+    auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType);
 
     // Load all of the coerced elements out into results.
     llvm::SmallVector<llvm::Value*, 4> results;
     Address addr = ReturnValue.withElementType(coercionType);
+    unsigned unpaddedIndex = 0;
     for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
       auto coercedEltType = coercionType->getElementType(i);
       if (ABIArgInfo::isPaddingForCoerceAndExpand(coercedEltType))
         continue;
 
       auto eltAddr = Builder.CreateStructGEP(addr, i);
-      auto elt = Builder.CreateLoad(eltAddr);
+      llvm::Value *elt = CreateCoercedLoad(
+          eltAddr,
+          unpaddedStruct ? unpaddedStruct->getElementType(unpaddedIndex++)
+                         : unpaddedCoercionType,
+          *this);
       results.push_back(elt);
     }
 
@@ -4207,12 +4244,6 @@ static void emitWriteback(CodeGenFunction &CGF,
     CGF.EmitBlock(contBB);
 }
 
-static void emitWritebacks(CodeGenFunction &CGF,
-                           const CallArgList &args) {
-  for (const auto &I : args.writebacks())
-    emitWriteback(CGF, I);
-}
-
 static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
                                             const CallArgList &CallArgs) {
   ArrayRef<CallArgList::CallArgCleanup> Cleanups =
@@ -4681,6 +4712,11 @@ void CallArg::copyInto(CodeGenFunction &CGF, Address Addr) const {
   IsUsed = true;
 }
 
+void CodeGenFunction::EmitWritebacks(const CallArgList &args) {
+  for (const auto &I : args.writebacks())
+    emitWriteback(*this, I);
+}
+
 void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                                   QualType type) {
   DisableDebugLocationUpdates Dis(*this, E);
@@ -5472,6 +5508,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     case ABIArgInfo::CoerceAndExpand: {
       auto coercionType = ArgInfo.getCoerceAndExpandType();
       auto layout = CGM.getDataLayout().getStructLayout(coercionType);
+      auto unpaddedCoercionType = ArgInfo.getUnpaddedCoerceAndExpandType();
+      auto *unpaddedStruct = dyn_cast<llvm::StructType>(unpaddedCoercionType);
 
       llvm::Value *tempSize = nullptr;
       Address addr = Address::invalid();
@@ -5502,11 +5540,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       addr = addr.withElementType(coercionType);
 
       unsigned IRArgPos = FirstIRArg;
+      unsigned unpaddedIndex = 0;
       for (unsigned i = 0, e = coercionType->getNumElements(); i != e; ++i) {
         llvm::Type *eltType = coercionType->getElementType(i);
         if (ABIArgInfo::isPaddingForCoerceAndExpand(eltType)) continue;
         Address eltAddr = Builder.CreateStructGEP(addr, i);
-        llvm::Value *elt = Builder.CreateLoad(eltAddr);
+        llvm::Value *elt = CreateCoercedLoad(
+            eltAddr,
+            unpaddedStruct ? unpaddedStruct->getElementType(unpaddedIndex++)
+                           : unpaddedCoercionType,
+            *this);
         if (ArgHasMaybeUndefAttr)
           elt = Builder.CreateFreeze(elt);
         IRCallArgs[IRArgPos++] = elt;
@@ -5897,7 +5940,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // Emit any call-associated writebacks immediately.  Arguably this
   // should happen after any return-value munging.
   if (CallArgs.hasWritebacks())
-    emitWritebacks(*this, CallArgs);
+    EmitWritebacks(CallArgs);
 
   // The stack cleanup for inalloca arguments has to run out of the normal
   // lexical order, so deactivate it and run it manually here.
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 27bbbfc6f531a10..ad64abe7cd40a39 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -624,8 +624,6 @@ void CGDebugInfo::CreateCompileUnit() {
   } else if (LO.OpenCL && (!CGM.getCodeGenOpts().DebugStrictDwarf ||
                            CGM.getCodeGenOpts().DwarfVersion >= 5)) {
     LangTag = llvm::dwarf::DW_LANG_OpenCL;
-  } else if (LO.RenderScript) {
-    LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript;
   } else if (LO.C11 && !(CGO.DebugStrictDwarf && CGO.DwarfVersion < 5)) {
       LangTag = llvm::dwarf::DW_LANG_C11;
   } else if (LO.C99) {
@@ -783,6 +781,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
 #define SVE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
 #include "clang/Basic/AArch64SVEACLETypes.def"
     {
+      if (BT->getKind() == BuiltinType::MFloat8) {
+        Encoding = llvm::dwarf::DW_ATE_unsigned_char;
+        BTName = BT->getName(CGM.getLangOpts());
+        // Bit size and offset of the type.
+        uint64_t Size = CGM.getContext().getTypeSize(BT);
+        return DBuilder.createBasicType(BTName, Size, Encoding);
+      }
       ASTContext::BuiltinVectorTypeInfo Info =
           // For svcount_t, only the lower 2 bytes are relevant.
           BT->getKind() == BuiltinType::SveCount
@@ -909,6 +914,13 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
                                      TheCU, TheCU->getFile(), 0);              \
     return SingletonId;                                                        \
   }
+#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope)  \
+  case BuiltinType::Id: {                                                      \
+    if (!SingletonId)                                                          \
+      SingletonId =                                                            \
+          DBuilder.createBasicType(Name, Width, llvm::dwarf::DW_ATE_unsigned); \
+    return SingletonId;                                                        \
+  }
 #include "clang/Basic/AMDGPUTypes.def"
   case BuiltinType::UChar:
   case BuiltinType::Char_U:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index e0ea65bcaf36372..e90e8da3e9f1ea1 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5460,9 +5460,8 @@ LValue CodeGenFunction::EmitOpaqueValueLValue(const OpaqueValueExpr *e) {
   return getOrCreateOpaqueLValueMapping(e);
 }
 
-void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
-                                         CallArgList &Args, QualType Ty) {
-
+std::pair<LValue, LValue>
+CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) {
   // Emitting the casted temporary through an opaque value.
   LValue BaseLV = EmitLValue(E->getArgLValue());
   OpaqueValueMappingData::bind(*this, E->getOpaqueArgLValue(), BaseLV);
@@ -5476,6 +5475,13 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
                                TempLV);
 
   OpaqueValueMappingData::bind(*this, E->getCastedTemporary(), TempLV);
+  return std::make_pair(BaseLV, TempLV);
+}
+
+LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
+                                           CallArgList &Args, QualType Ty) {
+
+  auto [BaseLV, TempLV] = EmitHLSLOutArgLValues(E, Ty);
 
   llvm::Value *Addr = TempLV.getAddress().getBasePointer();
   llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType());
@@ -5488,6 +5494,7 @@ void CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E,
   Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast(),
                     LifetimeSize);
   Args.add(RValue::get(TmpAddr, *this), Ty);
+  return TempLV;
 }
 
 LValue
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 2cce2936fe5aeef..06558ce796f2e49 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -404,6 +404,16 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
   BasicBlock *BB = BasicBlock::Create(Ctx, "entry", EntryFn);
   IRBuilder<> B(BB);
   llvm::SmallVector<Value *> Args;
+
+  SmallVector<OperandBundleDef, 1> OB;
+  if (CGM.shouldEmitConvergenceTokens()) {
+    assert(EntryFn->isConvergent());
+    llvm::Value *I = B.CreateIntrinsic(
+        llvm::Intrinsic::experimental_convergence_entry, {}, {});
+    llvm::Value *bundleArgs[] = {I};
+    OB.emplace_back("convergencectrl", bundleArgs);
+  }
+
   // FIXME: support struct parameters where semantics are on members.
   // See: https://github.com/llvm/llvm-project/issues/57874
   unsigned SRetOffset = 0;
@@ -419,7 +429,7 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl *FD,
     Args.push_back(emitInputSemantic(B, *PD, Param.getType()));
   }
 
-  CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args);
+  CallInst *CI = B.CreateCall(FunctionCallee(Fn), Args, OB);
   CI->setCallingConv(Fn->getCallingConv());
   // FIXME: Handle codegen for return type semantics.
   // See: https://github.com/llvm/llvm-project/issues/57875
@@ -474,14 +484,22 @@ void CGHLSLRuntime::generateGlobalCtorDtorCalls() {
   for (auto &F : M.functions()) {
     if (!F.hasFnAttribute("hlsl.shader"))
       continue;
-    IRBuilder<> B(&F.getEntryBlock(), F.getEntryBlock().begin());
+    auto *Token = getConvergenceToken(F.getEntryBlock());
+    Instruction *IP = &*F.getEntryBlock().begin();
+    SmallVector<OperandBundleDef, 1> OB;
+    if (Token) {
+      llvm::Value *bundleArgs[] = {Token};
+      OB.emplace_back("convergencectrl", bundleArgs);
+      IP = Token->getNextNode();
+    }
+    IRBuilder<> B(IP);
     for (auto *Fn : CtorFns)
-      B.CreateCall(FunctionCallee(Fn));
+      B.CreateCall(FunctionCallee(Fn), {}, OB);
 
     // Insert global dtors before the terminator of the last instruction
     B.SetInsertPoint(F.back().getTerminator());
     for (auto *Fn : DtorFns)
-      B.CreateCall(FunctionCallee(Fn));
+      B.CreateCall(FunctionCallee(Fn), {}, OB);
   }
 
   // No need to keep global ctors/dtors for non-lib profile after call to
@@ -579,3 +597,18 @@ llvm::Function *CGHLSLRuntime::createResourceBindingInitFn() {
   Builder.CreateRetVoid();
   return InitResBindingsFunc;
 }
+
+llvm::Instruction *CGHLSLRuntime::getConvergenceToken(BasicBlock &BB) {
+  if (!CGM.shouldEmitConvergenceTokens())
+    return nullptr;
+
+  auto E = BB.end();
+  for (auto I = BB.begin(); I != E; ++I) {
+    auto *II = dyn_cast<llvm::IntrinsicInst>(&*I);
+    if (II && llvm::isConvergenceControlIntrinsic(II->getIntrinsicID())) {
+      return II;
+    }
+  }
+  llvm_unreachable("Convergence token should have been emitted.");
+  return nullptr;
+}
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h
index ff7df41b5c62e71..cd533cad84e9fbe 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.h
+++ b/clang/lib/CodeGen/CGHLSLRuntime.h
@@ -143,6 +143,7 @@ class CGHLSLRuntime {
 
   bool needsResourceBindingInitFn();
   llvm::Function *createResourceBindingInitFn();
+  llvm::Instruction *getConvergenceToken(llvm::BasicBlock &BB);
 
 private:
   void addBufferResourceAnnotation(llvm::GlobalVariable *GV,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c0931e82d9875a9..d714af035d21a2a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1192,6 +1192,7 @@ struct PushAndPopStackRAII {
       CodeGenFunction::JumpDest Dest =
           CGF.getOMPCancelDestination(OMPD_parallel);
       CGF.EmitBranchThroughCleanup(Dest);
+      return llvm::Error::success();
     };
 
     // TODO: Remove this once we emit parallel regions through the
@@ -2331,8 +2332,11 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   auto *OMPRegionInfo =
       dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
   if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
-    CGF.Builder.restoreIP(OMPBuilder.createBarrier(
-        CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
+                                 EmitChecks);
+    assert(AfterIP && "unexpected error creating barrier");
+    CGF.Builder.restoreIP(*AfterIP);
     return;
   }
 
@@ -5928,8 +5932,10 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
         return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
       };
 
-  OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
-                                      IsOffloadEntry, OutlinedFn, OutlinedFnID);
+  llvm::Error Err = OMPBuilder.emitTargetRegionFunction(
+      EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
+      OutlinedFnID);
+  assert(!Err && "unexpected error creating target region");
 
   if (!OutlinedFn)
     return;
@@ -9670,9 +9676,12 @@ static void emitTargetCallKernelLaunch(
         NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
         DynCGGroupMem, HasNoWait);
 
-    CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch(
-        CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
-        RTLoc, AllocaIP));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPRuntime->getOMPBuilder().emitKernelLaunch(
+            CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
+            RTLoc, AllocaIP);
+    assert(AfterIP && "unexpected error creating kernel launch");
+    CGF.Builder.restoreIP(*AfterIP);
   };
 
   if (RequiresOuterTask)
@@ -10349,9 +10358,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
   InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
                           CGF.Builder.GetInsertPoint());
   llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
-  CGF.Builder.restoreIP(OMPBuilder.createTargetData(
-      OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
-      /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createTargetData(
+          OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
+          /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc);
+  assert(AfterIP && "unexpected error creating target data");
+  CGF.Builder.restoreIP(*AfterIP);
 }
 
 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c66d5d11b0bbfa3..598b946ad88dbbf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1753,11 +1753,14 @@ void CGOpenMPRuntimeGPU::emitReduction(
     Idx++;
   }
 
-  CGF.Builder.restoreIP(OMPBuilder.createReductionsGPU(
-      OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
-      DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
-      CGF.getTarget().getGridValue(), C.getLangOpts().OpenMPCUDAReductionBufNum,
-      RTLoc));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createReductionsGPU(
+          OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, false, TeamsReduction,
+          DistributeReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang,
+          CGF.getTarget().getGridValue(),
+          C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc);
+  assert(AfterIP && "unexpected error creating GPU reductions");
+  CGF.Builder.restoreIP(*AfterIP);
   return;
 }
 
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 0da7855ab05c6cc..1c32a675380c7f1 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1809,6 +1809,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
     // thus calls destructors etc.
     auto FiniCB = [this](InsertPointTy IP) {
       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+      return llvm::Error::success();
     };
 
     // Privatization callback that performs appropriate action for
@@ -1831,15 +1832,18 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
                                InsertPointTy CodeGenIP) {
       OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
           *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
+      return llvm::Error::success();
     };
 
     CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
-    Builder.restoreIP(
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
         OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
-                                  IfCond, NumThreads, ProcBind, S.hasCancel()));
+                                  IfCond, NumThreads, ProcBind, S.hasCancel());
+    assert(AfterIP && "unexpected error creating parallel");
+    Builder.restoreIP(*AfterIP);
     return;
   }
 
@@ -2128,9 +2132,13 @@ void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
 
     RunCleanupsScope BodyScope(*this);
     EmitStmt(BodyStmt);
+    return llvm::Error::success();
   };
-  llvm::CanonicalLoopInfo *CL =
+
+  llvm::Expected<llvm::CanonicalLoopInfo *> Result =
       OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
+  assert(Result && "unexpected error creating canonical loop");
+  llvm::CanonicalLoopInfo *CL = *Result;
 
   // Finish up the loop.
   Builder.restoreIP(CL->getAfterIP());
@@ -4016,11 +4024,13 @@ static void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
           CGM.getOpenMPRuntime().getOMPBuilder();
       llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
           CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
-      OMPBuilder.applyWorkshareLoop(
-          CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
-          SchedKind, ChunkSize, /*HasSimdModifier=*/false,
-          /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
-          /*HasOrderedClause=*/false);
+      llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+          OMPBuilder.applyWorkshareLoop(
+              CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP,
+              NeedsBarrier, SchedKind, ChunkSize, /*HasSimdModifier=*/false,
+              /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
+              /*HasOrderedClause=*/false);
+      assert(AfterIP && "unexpected error creating workshare loop");
       return;
     }
 
@@ -4257,6 +4267,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
 
     auto FiniCB = [this](InsertPointTy IP) {
       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+      return llvm::Error::success();
     };
 
     const CapturedStmt *ICS = S.getInnermostCapturedStmt();
@@ -4269,6 +4280,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
                                          InsertPointTy CodeGenIP) {
           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
               *this, SubStmt, AllocaIP, CodeGenIP, "section");
+          return llvm::Error::success();
         };
         SectionCBVector.push_back(SectionCB);
       }
@@ -4277,6 +4289,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
                                             InsertPointTy CodeGenIP) {
         OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
             *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
+        return llvm::Error::success();
       };
       SectionCBVector.push_back(SectionCB);
     }
@@ -4298,9 +4311,12 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
     CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
     llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
         AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
-    Builder.restoreIP(OMPBuilder.createSections(
-        Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
-        S.getSingleClause<OMPNowaitClause>()));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createSections(Builder, AllocaIP, SectionCBVector, PrivCB,
+                                  FiniCB, S.hasCancel(),
+                                  S.getSingleClause<OMPNowaitClause>());
+    assert(AfterIP && "unexpected error creating sections");
+    Builder.restoreIP(*AfterIP);
     return;
   }
   {
@@ -4326,17 +4342,22 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
     const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
     auto FiniCB = [this](InsertPointTy IP) {
       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+      return llvm::Error::success();
     };
 
     auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
                                                    InsertPointTy CodeGenIP) {
       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
           *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
+      return llvm::Error::success();
     };
 
     LexicalScope Scope(*this, S.getSourceRange());
     EmitStopPoint(&S);
-    Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createSection(Builder, BodyGenCB, FiniCB);
+    assert(AfterIP && "unexpected error creating section");
+    Builder.restoreIP(*AfterIP);
 
     return;
   }
@@ -4407,17 +4428,22 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
 
     auto FiniCB = [this](InsertPointTy IP) {
       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+      return llvm::Error::success();
     };
 
     auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
                                                   InsertPointTy CodeGenIP) {
       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
           *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
+      return llvm::Error::success();
     };
 
     LexicalScope Scope(*this, S.getSourceRange());
     EmitStopPoint(&S);
-    Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB);
+    assert(AfterIP && "unexpected error creating master");
+    Builder.restoreIP(*AfterIP);
 
     return;
   }
@@ -4453,18 +4479,22 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
 
     auto FiniCB = [this](InsertPointTy IP) {
       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+      return llvm::Error::success();
     };
 
     auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
                                                   InsertPointTy CodeGenIP) {
       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
           *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
+      return llvm::Error::success();
     };
 
     LexicalScope Scope(*this, S.getSourceRange());
     EmitStopPoint(&S);
-    Builder.restoreIP(
-        OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal);
+    assert(AfterIP && "unexpected error creating masked");
+    Builder.restoreIP(*AfterIP);
 
     return;
   }
@@ -4493,19 +4523,23 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
 
     auto FiniCB = [this](InsertPointTy IP) {
       OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+      return llvm::Error::success();
     };
 
     auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
                                                     InsertPointTy CodeGenIP) {
       OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
           *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
+      return llvm::Error::success();
     };
 
     LexicalScope Scope(*this, S.getSourceRange());
     EmitStopPoint(&S);
-    Builder.restoreIP(OMPBuilder.createCritical(
-        Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
-        HintInst));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
+                                  S.getDirectiveName().getAsString(), HintInst);
+    assert(AfterIP && "unexpected error creating critical");
+    Builder.restoreIP(*AfterIP);
 
     return;
   }
@@ -5464,11 +5498,15 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
                                InsertPointTy CodeGenIP) {
       Builder.restoreIP(CodeGenIP);
       EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
+      return llvm::Error::success();
     };
     CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
     if (!CapturedStmtInfo)
       CapturedStmtInfo = &CapStmtInfo;
-    Builder.restoreIP(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB);
+    assert(AfterIP && "unexpected error creating taskgroup");
+    Builder.restoreIP(*AfterIP);
     return;
   }
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
@@ -6041,6 +6079,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
 
       auto FiniCB = [this](InsertPointTy IP) {
         OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+        return llvm::Error::success();
       };
 
       auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
@@ -6064,11 +6103,14 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
           OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
               *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
         }
+        return llvm::Error::success();
       };
 
       OMPLexicalScope Scope(*this, S, OMPD_unknown);
-      Builder.restoreIP(
-          OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C));
+      llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+          OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, !C);
+      assert(AfterIP && "unexpected error creating ordered");
+      Builder.restoreIP(*AfterIP);
     }
     return;
   }
@@ -7344,8 +7386,10 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
       if (IfCond)
         IfCondition = EmitScalarExpr(IfCond,
                                      /*IgnoreResultAssign=*/true);
-      return Builder.restoreIP(
-          OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
+      llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+          OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion());
+      assert(AfterIP && "unexpected error creating cancel");
+      return Builder.restoreIP(*AfterIP);
     }
   }
 
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 573ced0857d5f5f..6ead45793742d6c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -852,7 +852,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
         if (Fe.Effect.kind() == FunctionEffect::Kind::NonBlocking)
           Fn->addFnAttr(llvm::Attribute::SanitizeRealtime);
         else if (Fe.Effect.kind() == FunctionEffect::Kind::Blocking)
-          Fn->addFnAttr(llvm::Attribute::SanitizeRealtimeUnsafe);
+          Fn->addFnAttr(llvm::Attribute::SanitizeRealtimeBlocking);
       }
 
   // Apply fuzzing attribute to the function.
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 750a6cc24badca9..3ff4458fb320243 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4296,8 +4296,11 @@ class CodeGenFunction : public CodeGenTypeCache {
   LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
   LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e);
   LValue EmitHLSLArrayAssignLValue(const BinaryOperator *E);
-  void EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args,
-                          QualType Ty);
+
+  std::pair<LValue, LValue> EmitHLSLOutArgLValues(const HLSLOutArgExpr *E,
+                                                  QualType Ty);
+  LValue EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, CallArgList &Args,
+                            QualType Ty);
 
   Address EmitExtVectorElementLValue(LValue V);
 
@@ -5147,6 +5150,9 @@ class CodeGenFunction : public CodeGenTypeCache {
                            SourceLocation ArgLoc, AbstractCallee AC,
                            unsigned ParmNum);
 
+  /// EmitWriteback - Emit callbacks for function.
+  void EmitWritebacks(const CallArgList &Args);
+
   /// EmitCallArg - Emit a single call argument.
   void EmitCallArg(CallArgList &args, const Expr *E, QualType ArgType);
 
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 2bcca5e85bdfeb9..ba376f9ecfacde7 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -7146,8 +7146,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     // For C++ standard modules we are done - we will call the module
     // initializer for imported modules, and that will likewise call those for
     // any imports it has.
-    if (CXX20ModuleInits && Import->getImportedOwningModule() &&
-        !Import->getImportedOwningModule()->isModuleMapModule())
+    if (CXX20ModuleInits && Import->getImportedModule() &&
+        Import->getImportedModule()->isNamedModule())
       break;
 
     // For clang C++ module map modules the initializers for sub-modules are
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index f87184fc77832ca..09191a4901f4932 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -564,6 +564,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
 #define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS)        \
   case BuiltinType::Id:                                                        \
     return llvm::PointerType::get(getLLVMContext(), AS);
+#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope)  \
+  case BuiltinType::Id:                                                        \
+    return llvm::TargetExtType::get(getLLVMContext(), "amdgcn.named.barrier",  \
+                                    {}, {Scope});
 #include "clang/Basic/AMDGPUTypes.def"
 #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
 #include "clang/Basic/HLSLIntangibleTypes.def"
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index ec617eec67192cc..9320c6ef06efab0 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -34,10 +34,17 @@ class AArch64ABIInfo : public ABIInfo {
   AArch64ABIKind getABIKind() const { return Kind; }
   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
 
-  ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
-                                  unsigned CallingConvention) const;
-  ABIArgInfo coerceIllegalVector(QualType Ty) const;
+  ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadicFn) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadicFn,
+                                  bool IsNamedArg, unsigned CallingConvention,
+                                  unsigned &NSRN, unsigned &NPRN) const;
+  llvm::Type *convertFixedToScalableVectorType(const VectorType *VT) const;
+  ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
+                                 unsigned &NPRN) const;
+  ABIArgInfo coerceAndExpandPureScalableAggregate(
+      QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
+      const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
+      unsigned &NPRN) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
@@ -45,14 +52,26 @@ class AArch64ABIInfo : public ABIInfo {
 
   bool isIllegalVectorType(QualType Ty) const;
 
+  bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
+                              SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
+
+  void flattenType(llvm::Type *Ty,
+                   SmallVectorImpl<llvm::Type *> &Flattened) const;
+
   void computeInfo(CGFunctionInfo &FI) const override {
     if (!::classifyReturnType(getCXXABI(), FI, *this))
       FI.getReturnInfo() =
           classifyReturnType(FI.getReturnType(), FI.isVariadic());
 
-    for (auto &it : FI.arguments())
-      it.info = classifyArgumentType(it.type, FI.isVariadic(),
-                                     FI.getCallingConvention());
+    unsigned ArgNo = 0;
+    unsigned NSRN = 0, NPRN = 0;
+    for (auto &it : FI.arguments()) {
+      const bool IsNamedArg =
+          !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
+      ++ArgNo;
+      it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg,
+                                     FI.getCallingConvention(), NSRN, NPRN);
+    }
   }
 
   RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
@@ -201,65 +220,83 @@ void WindowsAArch64TargetCodeGenInfo::setTargetAttributes(
 }
 }
 
-ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
-  assert(Ty->isVectorType() && "expected vector type!");
+llvm::Type *
+AArch64ABIInfo::convertFixedToScalableVectorType(const VectorType *VT) const {
+  assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
 
-  const auto *VT = Ty->castAs<VectorType>();
   if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
-    assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
     assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
                BuiltinType::UChar &&
            "unexpected builtin type for SVE predicate!");
-    return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
-        llvm::Type::getInt1Ty(getVMContext()), 16));
+    return llvm::ScalableVectorType::get(llvm::Type::getInt1Ty(getVMContext()),
+                                         16);
   }
 
   if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
-    assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
-
     const auto *BT = VT->getElementType()->castAs<BuiltinType>();
-    llvm::ScalableVectorType *ResType = nullptr;
     switch (BT->getKind()) {
     default:
       llvm_unreachable("unexpected builtin type for SVE vector!");
+
     case BuiltinType::SChar:
     case BuiltinType::UChar:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getInt8Ty(getVMContext()), 16);
-      break;
+
     case BuiltinType::Short:
     case BuiltinType::UShort:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getInt16Ty(getVMContext()), 8);
-      break;
+
     case BuiltinType::Int:
     case BuiltinType::UInt:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 4);
-      break;
+
     case BuiltinType::Long:
     case BuiltinType::ULong:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getInt64Ty(getVMContext()), 2);
-      break;
+
     case BuiltinType::Half:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getHalfTy(getVMContext()), 8);
-      break;
+
     case BuiltinType::Float:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getFloatTy(getVMContext()), 4);
-      break;
+
     case BuiltinType::Double:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getDoubleTy(getVMContext()), 2);
-      break;
+
     case BuiltinType::BFloat16:
-      ResType = llvm::ScalableVectorType::get(
+      return llvm::ScalableVectorType::get(
           llvm::Type::getBFloatTy(getVMContext()), 8);
-      break;
     }
-    return ABIArgInfo::getDirect(ResType);
+  }
+
+  llvm_unreachable("expected fixed-length SVE vector");
+}
+
+ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
+                                               unsigned &NPRN) const {
+  assert(Ty->isVectorType() && "expected vector type!");
+
+  const auto *VT = Ty->castAs<VectorType>();
+  if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
+    assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
+    assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
+               BuiltinType::UChar &&
+           "unexpected builtin type for SVE predicate!");
+    NPRN = std::min(NPRN + 1, 4u);
+    return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
+        llvm::Type::getInt1Ty(getVMContext()), 16));
+  }
+
+  if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
+    NSRN = std::min(NSRN + 1, 8u);
+    return ABIArgInfo::getDirect(convertFixedToScalableVectorType(VT));
   }
 
   uint64_t Size = getContext().getTypeSize(Ty);
@@ -273,26 +310,54 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
     return ABIArgInfo::getDirect(ResType);
   }
   if (Size == 64) {
+    NSRN = std::min(NSRN + 1, 8u);
     auto *ResType =
         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
     return ABIArgInfo::getDirect(ResType);
   }
   if (Size == 128) {
+    NSRN = std::min(NSRN + 1, 8u);
     auto *ResType =
         llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
     return ABIArgInfo::getDirect(ResType);
   }
+
   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 }
 
-ABIArgInfo
-AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
-                                     unsigned CallingConvention) const {
+ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
+    QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
+    const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
+    unsigned &NPRN) const {
+  if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+  NSRN += NVec;
+  NPRN += NPred;
+
+  llvm::Type *UnpaddedCoerceToType =
+      UnpaddedCoerceToSeq.size() == 1
+          ? UnpaddedCoerceToSeq[0]
+          : llvm::StructType::get(CGT.getLLVMContext(), UnpaddedCoerceToSeq,
+                                  true);
+
+  SmallVector<llvm::Type *> CoerceToSeq;
+  flattenType(CGT.ConvertType(Ty), CoerceToSeq);
+  auto *CoerceToType =
+      llvm::StructType::get(CGT.getLLVMContext(), CoerceToSeq, false);
+
+  return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
+}
+
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
+                                                bool IsNamedArg,
+                                                unsigned CallingConvention,
+                                                unsigned &NSRN,
+                                                unsigned &NPRN) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty))
-    return coerceIllegalVector(Ty);
+    return coerceIllegalVector(Ty, NSRN, NPRN);
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
@@ -303,6 +368,36 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
       if (EIT->getNumBits() > 128)
         return getNaturalAlignIndirect(Ty, false);
 
+    if (Ty->isVectorType())
+      NSRN = std::min(NSRN + 1, 8u);
+    else if (const auto *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->isFloatingPoint())
+        NSRN = std::min(NSRN + 1, 8u);
+      else {
+        switch (BT->getKind()) {
+        case BuiltinType::MFloat8x8:
+        case BuiltinType::MFloat8x16:
+          NSRN = std::min(NSRN + 1, 8u);
+          break;
+        case BuiltinType::SveBool:
+        case BuiltinType::SveCount:
+          NPRN = std::min(NPRN + 1, 4u);
+          break;
+        case BuiltinType::SveBoolx2:
+          NPRN = std::min(NPRN + 2, 4u);
+          break;
+        case BuiltinType::SveBoolx4:
+          NPRN = std::min(NPRN + 4, 4u);
+          break;
+        default:
+          if (BT->isSVESizelessBuiltinType())
+            NSRN = std::min(
+                NSRN + getContext().getBuiltinVectorTypeInfo(BT).NumVectors,
+                8u);
+        }
+      }
+    }
+
     return (isPromotableIntegerTypeForABI(Ty) && isDarwinPCS()
                 ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty))
                 : ABIArgInfo::getDirect());
@@ -335,10 +430,11 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
   uint64_t Members = 0;
   bool IsWin64 = Kind == AArch64ABIKind::Win64 ||
                  CallingConvention == llvm::CallingConv::Win64;
-  bool IsWinVariadic = IsWin64 && IsVariadic;
+  bool IsWinVariadic = IsWin64 && IsVariadicFn;
   // In variadic functions on Windows, all composite types are treated alike,
   // no special handling of HFAs/HVAs.
   if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
+    NSRN = std::min(NSRN + Members, uint64_t(8));
     if (Kind != AArch64ABIKind::AAPCS)
       return ABIArgInfo::getDirect(
           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
@@ -353,13 +449,19 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
         nullptr, true, Align);
   }
 
+  // In AAPCS named arguments of a Pure Scalable Type are passed expanded in
+  // registers, or indirectly if there are not enough registers.
+  if (Kind == AArch64ABIKind::AAPCS) {
+    unsigned NVec = 0, NPred = 0;
+    SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
+    if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) &&
+        (NVec + NPred) > 0)
+      return coerceAndExpandPureScalableAggregate(
+          Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
+  }
+
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
   if (Size <= 128) {
-    // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
-    // same size and alignment.
-    if (getTarget().isRenderScriptTarget()) {
-      return coerceToIntArray(Ty, getContext(), getVMContext());
-    }
     unsigned Alignment;
     if (Kind == AArch64ABIKind::AAPCS) {
       Alignment = getContext().getTypeUnadjustedAlign(Ty);
@@ -383,14 +485,16 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
 }
 
 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
-                                              bool IsVariadic) const {
+                                              bool IsVariadicFn) const {
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   if (const auto *VT = RetTy->getAs<VectorType>()) {
     if (VT->getVectorKind() == VectorKind::SveFixedLengthData ||
-        VT->getVectorKind() == VectorKind::SveFixedLengthPredicate)
-      return coerceIllegalVector(RetTy);
+        VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
+      unsigned NSRN = 0, NPRN = 0;
+      return coerceIllegalVector(RetTy, NSRN, NPRN);
+    }
   }
 
   // Large vector types should be returned via memory.
@@ -419,18 +523,26 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
   uint64_t Members = 0;
   if (isHomogeneousAggregate(RetTy, Base, Members) &&
       !(getTarget().getTriple().getArch() == llvm::Triple::aarch64_32 &&
-        IsVariadic))
+        IsVariadicFn))
     // Homogeneous Floating-point Aggregates (HFAs) are returned directly.
     return ABIArgInfo::getDirect();
 
+  // In AAPCS return values of a Pure Scalable type are treated as a single
+  // named argument and passed expanded in registers, or indirectly if there are
+  // not enough registers.
+  if (Kind == AArch64ABIKind::AAPCS) {
+    unsigned NSRN = 0, NPRN = 0;
+    unsigned NVec = 0, NPred = 0;
+    SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
+    if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) &&
+        (NVec + NPred) > 0)
+      return coerceAndExpandPureScalableAggregate(
+          RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
+          NPRN);
+  }
+
   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
   if (Size <= 128) {
-    // On RenderScript, coerce Aggregates <= 16 bytes to an integer array of
-    // same size and alignment.
-    if (getTarget().isRenderScriptTarget()) {
-      return coerceToIntArray(RetTy, getContext(), getVMContext());
-    }
-
     if (Size <= 64 && getDataLayout().isLittleEndian()) {
       // Composite types are returned in lower bits of a 64-bit register for LE,
       // and in higher bits for BE. However, integer types are always returned
@@ -508,9 +620,15 @@ bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // but with the difference that any floating-point type is allowed,
   // including __fp16.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->isFloatingPoint())
+    if (BT->isFloatingPoint() || BT->getKind() == BuiltinType::MFloat8x16 ||
+        BT->getKind() == BuiltinType::MFloat8x8)
       return true;
   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    if (auto Kind = VT->getVectorKind();
+        Kind == VectorKind::SveFixedLengthData ||
+        Kind == VectorKind::SveFixedLengthPredicate)
+      return false;
+
     unsigned VecSize = getContext().getTypeSize(VT);
     if (VecSize == 64 || VecSize == 128)
       return true;
@@ -533,11 +651,166 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
   return true;
 }
 
+// Check if a type needs to be passed in registers as a Pure Scalable Type (as
+// defined by AAPCS64). Return the number of data vectors and the number of
+// predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
+// return `CoerceToSeq` contains an expanded sequence of LLVM IR types, one
+// element for each non-composite member. For practical purposes, limit the
+// length of `CoerceToSeq` to about 12 (the maximum that could possibly fit
+// in registers) and return false, the effect of which will be to  pass the
+// argument under the rules for a large (> 128 bytes) composite.
+bool AArch64ABIInfo::passAsPureScalableType(
+    QualType Ty, unsigned &NVec, unsigned &NPred,
+    SmallVectorImpl<llvm::Type *> &CoerceToSeq) const {
+  if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
+    uint64_t NElt = AT->getZExtSize();
+    if (NElt == 0)
+      return false;
+
+    unsigned NV = 0, NP = 0;
+    SmallVector<llvm::Type *> EltCoerceToSeq;
+    if (!passAsPureScalableType(AT->getElementType(), NV, NP, EltCoerceToSeq))
+      return false;
+
+    if (CoerceToSeq.size() + NElt * EltCoerceToSeq.size() > 12)
+      return false;
+
+    for (uint64_t I = 0; I < NElt; ++I)
+      llvm::copy(EltCoerceToSeq, std::back_inserter(CoerceToSeq));
+
+    NVec += NElt * NV;
+    NPred += NElt * NP;
+    return true;
+  }
+
+  if (const RecordType *RT = Ty->getAs<RecordType>()) {
+    // If the record cannot be passed in registers, then it's not a PST.
+    if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI());
+        RAA != CGCXXABI::RAA_Default)
+      return false;
+
+    // Pure scalable types are never unions and never contain unions.
+    const RecordDecl *RD = RT->getDecl();
+    if (RD->isUnion())
+      return false;
+
+    // If this is a C++ record, check the bases.
+    if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+      for (const auto &I : CXXRD->bases()) {
+        if (isEmptyRecord(getContext(), I.getType(), true))
+          continue;
+        if (!passAsPureScalableType(I.getType(), NVec, NPred, CoerceToSeq))
+          return false;
+      }
+    }
+
+    // Check members.
+    for (const auto *FD : RD->fields()) {
+      QualType FT = FD->getType();
+      if (isEmptyField(getContext(), FD, /* AllowArrays */ true))
+        continue;
+      if (!passAsPureScalableType(FT, NVec, NPred, CoerceToSeq))
+        return false;
+    }
+
+    return true;
+  }
+
+  const auto *VT = Ty->getAs<VectorType>();
+  if (!VT)
+    return false;
+
+  if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
+    ++NPred;
+    if (CoerceToSeq.size() + 1 > 12)
+      return false;
+    CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
+    return true;
+  }
+
+  if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
+    ++NVec;
+    if (CoerceToSeq.size() + 1 > 12)
+      return false;
+    CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
+    return true;
+  }
+
+  if (!VT->isBuiltinType())
+    return false;
+
+  switch (cast<BuiltinType>(VT)->getKind()) {
+#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId)                    \
+  case BuiltinType::Id:                                                        \
+    ++NVec;                                                                    \
+    break;
+#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId)                 \
+  case BuiltinType::Id:                                                        \
+    ++NPred;                                                                   \
+    break;
+#define SVE_TYPE(Name, Id, SingletonId)
+#include "clang/Basic/AArch64SVEACLETypes.def"
+  default:
+    return false;
+  }
+
+  ASTContext::BuiltinVectorTypeInfo Info =
+      getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
+  assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
+         "Expected 1, 2, 3 or 4 vectors!");
+  auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType),
+                                           Info.EC.getKnownMinValue());
+
+  if (CoerceToSeq.size() + Info.NumVectors > 12)
+    return false;
+  std::fill_n(std::back_inserter(CoerceToSeq), Info.NumVectors, VTy);
+
+  return true;
+}
+
+// Expand an LLVM IR type into a sequence with a element for each non-struct,
+// non-array member of the type, with the exception of the padding types, which
+// are retained.
+void AArch64ABIInfo::flattenType(
+    llvm::Type *Ty, SmallVectorImpl<llvm::Type *> &Flattened) const {
+
+  if (ABIArgInfo::isPaddingForCoerceAndExpand(Ty)) {
+    Flattened.push_back(Ty);
+    return;
+  }
+
+  if (const auto *AT = dyn_cast<llvm::ArrayType>(Ty)) {
+    uint64_t NElt = AT->getNumElements();
+    if (NElt == 0)
+      return;
+
+    SmallVector<llvm::Type *> EltFlattened;
+    flattenType(AT->getElementType(), EltFlattened);
+
+    for (uint64_t I = 0; I < NElt; ++I)
+      llvm::copy(EltFlattened, std::back_inserter(Flattened));
+    return;
+  }
+
+  if (const auto *ST = dyn_cast<llvm::StructType>(Ty)) {
+    for (auto *ET : ST->elements())
+      flattenType(ET, Flattened);
+    return;
+  }
+
+  Flattened.push_back(Ty);
+}
+
 RValue AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
                                       CodeGenFunction &CGF, AArch64ABIKind Kind,
                                       AggValueSlot Slot) const {
-  ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
-                                       CGF.CurFnInfo->getCallingConvention());
+  // These numbers are not used for variadic arguments, hence it doesn't matter
+  // they don't retain their values across multiple calls to
+  // `classifyArgumentType` here.
+  unsigned NSRN = 0, NPRN = 0;
+  ABIArgInfo AI =
+      classifyArgumentType(Ty, /*IsVariadicFn=*/true, /* IsNamedArg */ false,
+                           CGF.CurFnInfo->getCallingConvention(), NSRN, NPRN);
   // Empty records are ignored for parameter passing purposes.
   if (AI.isIgnore())
     return Slot.asRValue();
diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp
index 49ac1a76e767aa0..2d858fa2f3c3a35 100644
--- a/clang/lib/CodeGen/Targets/ARM.cpp
+++ b/clang/lib/CodeGen/Targets/ARM.cpp
@@ -420,12 +420,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
                                    /*Realign=*/TyAlign > ABIAlign);
   }
 
-  // On RenderScript, coerce Aggregates <= 64 bytes to an integer array of
-  // same size and alignment.
-  if (getTarget().isRenderScriptTarget()) {
-    return coerceToIntArray(Ty, getContext(), getVMContext());
-  }
-
   // Otherwise, pass by coercing to a structure of the appropriate size.
   llvm::Type* ElemTy;
   unsigned SizeRegs;
@@ -609,11 +603,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy, bool isVariadic,
   // are returned indirectly.
   uint64_t Size = getContext().getTypeSize(RetTy);
   if (Size <= 32) {
-    // On RenderScript, coerce Aggregates <= 4 bytes to an integer array of
-    // same size and alignment.
-    if (getTarget().isRenderScriptTarget()) {
-      return coerceToIntArray(RetTy, getContext(), getVMContext());
-    }
     if (getDataLayout().isBigEndian())
       // Return in 32 bit integer integer type (as if loaded by LDR, AAPCS 5.4)
       return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
diff --git a/clang/lib/CodeGen/Targets/RISCV.cpp b/clang/lib/CodeGen/Targets/RISCV.cpp
index fd72fe673b9b14d..b04e436c665f523 100644
--- a/clang/lib/CodeGen/Targets/RISCV.cpp
+++ b/clang/lib/CodeGen/Targets/RISCV.cpp
@@ -594,6 +594,11 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
     const auto *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD) return;
 
+    auto *Fn = cast<llvm::Function>(GV);
+
+    if (CGM.getCodeGenOpts().CFProtectionReturn)
+      Fn->addFnAttr("hw-shadow-stack");
+
     const auto *Attr = FD->getAttr<RISCVInterruptAttr>();
     if (!Attr)
       return;
@@ -604,8 +609,6 @@ class RISCVTargetCodeGenInfo : public TargetCodeGenInfo {
     case RISCVInterruptAttr::machine: Kind = "machine"; break;
     }
 
-    auto *Fn = cast<llvm::Function>(GV);
-
     Fn->addFnAttr("interrupt", Kind);
   }
 };
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 4df317709508587..34de0043ca012aa 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -897,7 +897,9 @@ bool ToolChain::needsProfileRT(const ArgList &Args) {
          Args.hasArg(options::OPT_fprofile_instr_generate) ||
          Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
          Args.hasArg(options::OPT_fcreate_profile) ||
-         Args.hasArg(options::OPT_forder_file_instrumentation);
+         Args.hasArg(options::OPT_forder_file_instrumentation) ||
+         Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage) ||
+         Args.hasArg(options::OPT_fprofile_generate_cold_function_coverage_EQ);
 }
 
 bool ToolChain::needsGCovInstrumentation(const llvm::opt::ArgList &Args) {
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 2c85d21ebd738c7..a8061ffd9321f58 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -648,6 +648,17 @@ void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
         Args.MakeArgString("-plugin-opt=-mattr=" + llvm::join(Features, ",")));
   }
 
+  if (Args.hasArg(options::OPT_stdlib))
+    CmdArgs.append({"-lc", "-lm"});
+  if (Args.hasArg(options::OPT_startfiles)) {
+    std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
+    if (!IncludePath)
+      IncludePath = "/lib";
+    SmallString<128> P(*IncludePath);
+    llvm::sys::path::append(P, "crt1.o");
+    CmdArgs.push_back(Args.MakeArgString(P));
+  }
+
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
   C.addCommand(std::make_unique<Command>(
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 04b3832327a99c4..4c6f508f1f24a62 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -632,6 +632,26 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C,
     }
   }
 
+  if (auto *ColdFuncCoverageArg = Args.getLastArg(
+          options::OPT_fprofile_generate_cold_function_coverage,
+          options::OPT_fprofile_generate_cold_function_coverage_EQ)) {
+    SmallString<128> Path(
+        ColdFuncCoverageArg->getOption().matches(
+            options::OPT_fprofile_generate_cold_function_coverage_EQ)
+            ? ColdFuncCoverageArg->getValue()
+            : "");
+    llvm::sys::path::append(Path, "default_%m.profraw");
+    // FIXME: Idealy the file path should be passed through
+    // `-fprofile-instrument-path=`(InstrProfileOutput), however, this field is
+    // shared with other profile use path(see PGOOptions), we need to refactor
+    // PGOOptions to make it work.
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back(Args.MakeArgString(
+        Twine("--instrument-cold-function-only-path=") + Path));
+    CmdArgs.push_back("-mllvm");
+    CmdArgs.push_back("--pgo-function-entry-coverage");
+  }
+
   Arg *PGOGenArg = nullptr;
   if (PGOGenerateArg) {
     assert(!CSPGOGenerateArg);
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 412b379304b1e6f..ddd5ea248ca0cca 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -643,6 +643,17 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   llvm::sys::path::append(DefaultLibPath, CLANG_INSTALL_LIBDIR_BASENAME);
   CmdArgs.push_back(Args.MakeArgString(Twine("-L") + DefaultLibPath));
 
+  if (Args.hasArg(options::OPT_stdlib))
+    CmdArgs.append({"-lc", "-lm"});
+  if (Args.hasArg(options::OPT_startfiles)) {
+    std::optional<std::string> IncludePath = getToolChain().getStdlibPath();
+    if (!IncludePath)
+      IncludePath = "/lib";
+    SmallString<128> P(*IncludePath);
+    llvm::sys::path::append(P, "crt1.o");
+    CmdArgs.push_back(Args.MakeArgString(P));
+  }
+
   C.addCommand(std::make_unique<Command>(
       JA, *this,
       ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index 366cadc2e547752..f9d2fdffe3b2fc0 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -148,6 +148,7 @@ void Flang::addCodegenOptions(const ArgList &Args,
 
   Args.addAllArgs(CmdArgs, {options::OPT_flang_experimental_hlfir,
                             options::OPT_flang_deprecated_no_hlfir,
+                            options::OPT_flang_experimental_integer_overflow,
                             options::OPT_fno_ppc_native_vec_elem_order,
                             options::OPT_fppc_native_vec_elem_order});
 }
@@ -202,6 +203,32 @@ void Flang::AddAArch64TargetArgs(const ArgList &Args,
   }
 }
 
+void Flang::AddPPCTargetArgs(const ArgList &Args,
+                             ArgStringList &CmdArgs) const {
+  const Driver &D = getToolChain().getDriver();
+  bool VecExtabi = false;
+
+  if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
+    StringRef V = A->getValue();
+    if (V == "vec-extabi")
+      VecExtabi = true;
+    else if (V == "vec-default")
+      VecExtabi = false;
+    else
+      D.Diag(diag::err_drv_unsupported_option_argument)
+          << A->getSpelling() << V;
+  }
+
+  const llvm::Triple &T = getToolChain().getTriple();
+  if (VecExtabi) {
+    if (!T.isOSAIX()) {
+      D.Diag(diag::err_drv_unsupported_opt_for_target)
+          << "-mabi=vec-extabi" << T.str();
+    }
+    CmdArgs.push_back("-mabi=vec-extabi");
+  }
+}
+
 void Flang::AddRISCVTargetArgs(const ArgList &Args,
                                ArgStringList &CmdArgs) const {
   const llvm::Triple &Triple = getToolChain().getTriple();
@@ -382,6 +409,11 @@ void Flang::addTargetOptions(const ArgList &Args,
     getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false);
     AddX86_64TargetArgs(Args, CmdArgs);
     break;
+  case llvm::Triple::ppc:
+  case llvm::Triple::ppc64:
+  case llvm::Triple::ppc64le:
+    AddPPCTargetArgs(Args, CmdArgs);
+    break;
   }
 
   if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h
index 9f5e26b8608324d..4d7d0b8cd9ea55c 100644
--- a/clang/lib/Driver/ToolChains/Flang.h
+++ b/clang/lib/Driver/ToolChains/Flang.h
@@ -84,6 +84,13 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool {
   void AddX86_64TargetArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// Add specific options for PPC target.
+  ///
+  /// \param [in] Args The list of input driver arguments
+  /// \param [out] CmdArgs The list of output command arguments
+  void AddPPCTargetArgs(const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs) const;
+
   /// Extract offload options from the driver arguments and add them to
   /// the command arguments.
   /// \param [in] C The current compilation for the driver invocation
diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp
index a50333223ff5c41..9daafbe703f68eb 100644
--- a/clang/lib/Driver/ToolChains/PS4CPU.cpp
+++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp
@@ -250,6 +250,20 @@ void tools::PS5cpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-pie");
 
   if (!Relocatable) {
+    CmdArgs.push_back("--eh-frame-hdr");
+    CmdArgs.push_back("--hash-style=sysv");
+
+    // Add a build-id by default to allow the PlayStation symbol server to
+    // index the symbols. `uuid` is the cheapest fool-proof method.
+    // (The non-determinism and alternative methods are noted in the downstream
+    // PlayStation docs).
+    CmdArgs.push_back("--build-id=uuid");
+
+    // All references are expected to be resolved at static link time for both
+    // executables and dynamic libraries. This has been the default linking
+    // behaviour for numerous PlayStation generations.
+    CmdArgs.push_back("--unresolved-symbols=report-all");
+
     // Lazy binding of PLTs is not supported on PlayStation. They are placed in
     // the RelRo segment.
     CmdArgs.push_back("-z");
@@ -416,8 +430,7 @@ toolchains::PS4PS5Base::PS4PS5Base(const Driver &D, const llvm::Triple &Triple,
 }
 
 void toolchains::PS4PS5Base::AddClangSystemIncludeArgs(
-    const ArgList &DriverArgs,
-    ArgStringList &CC1Args) const {
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
   const Driver &D = getDriver();
 
   if (DriverArgs.hasArg(options::OPT_nostdinc))
diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index 3de45b00b4d00fd..b14aac0f0ce015e 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -201,7 +201,6 @@ bool types::isDerivedFromC(ID Id) {
   case TY_PP_ObjCXX:
   case TY_PP_ObjCXX_Alias:
   case TY_ObjCXX:
-  case TY_RenderScript:
   case TY_PP_CHeader:
   case TY_CHeader:
   case TY_CLHeader:
@@ -328,7 +327,6 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) {
       .Case("ll", TY_LLVM_IR)
       .Case("mi", TY_PP_ObjC)
       .Case("mm", TY_ObjCXX)
-      .Case("rs", TY_RenderScript)
       .Case("adb", TY_Ada)
       .Case("ads", TY_Ada)
       .Case("asm", TY_PP_Asm)
diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
index 030509d37875950..c730c062b6a1d56 100644
--- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
+++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp
@@ -213,7 +213,6 @@ StringRef getLanguageName(Language Lang) {
   case Language::OpenCL:
   case Language::OpenCLCXX:
   case Language::CUDA:
-  case Language::RenderScript:
   case Language::HIP:
   case Language::HLSL:
 
diff --git a/clang/lib/Frontend/ASTUnit.cpp b/clang/lib/Frontend/ASTUnit.cpp
index bffff0d27af3ab6..4aec928f9eb0a5b 100644
--- a/clang/lib/Frontend/ASTUnit.cpp
+++ b/clang/lib/Frontend/ASTUnit.cpp
@@ -2699,8 +2699,6 @@ InputKind ASTUnit::getInputKind() const {
     Lang = Language::OpenCL;
   else if (LangOpts.CUDA)
     Lang = Language::CUDA;
-  else if (LangOpts.RenderScript)
-    Lang = Language::RenderScript;
   else if (LangOpts.CPlusPlus)
     Lang = LangOpts.ObjC ? Language::ObjCXX : Language::CXX;
   else
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index db7c791059a32ea..d8261e12b08b5c8 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2846,9 +2846,6 @@ static void GenerateFrontendArgs(const FrontendOptions &Opts,
     case Language::ObjCXX:
       Lang = "objective-c++";
       break;
-    case Language::RenderScript:
-      Lang = "renderscript";
-      break;
     case Language::Asm:
       Lang = "assembler-with-cpp";
       break;
@@ -3071,7 +3068,6 @@ static bool ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
                 .Case("c++", Language::CXX)
                 .Case("objective-c", Language::ObjC)
                 .Case("objective-c++", Language::ObjCXX)
-                .Case("renderscript", Language::RenderScript)
                 .Case("hlsl", Language::HLSL)
                 .Default(Language::Unknown);
 
@@ -3499,7 +3495,6 @@ static bool IsInputCompatibleWithStandard(InputKind IK,
 
   case Language::C:
   case Language::ObjC:
-  case Language::RenderScript:
     return S.getLanguage() == Language::C;
 
   case Language::OpenCL:
@@ -3551,8 +3546,6 @@ static StringRef GetInputKindName(InputKind IK) {
     return "C++ for OpenCL";
   case Language::CUDA:
     return "CUDA";
-  case Language::RenderScript:
-    return "RenderScript";
   case Language::HIP:
     return "HIP";
 
diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp
index 81eea9c4c4dc58e..9a50e7453eb61a5 100644
--- a/clang/lib/Frontend/FrontendAction.cpp
+++ b/clang/lib/Frontend/FrontendAction.cpp
@@ -358,7 +358,7 @@ static std::error_code collectModuleHeaderIncludes(
 
   // Add includes for each of these headers.
   for (auto HK : {Module::HK_Normal, Module::HK_Private}) {
-    for (Module::Header &H : Module->Headers[HK]) {
+    for (const Module::Header &H : Module->getHeaders(HK)) {
       Module->addTopHeader(H.Entry);
       // Use the path as specified in the module map file. We'll look for this
       // file relative to the module build directory (the directory containing
@@ -534,7 +534,6 @@ static Module *prepareToBuildModule(CompilerInstance &CI,
     }
     if (*OriginalModuleMap != CI.getSourceManager().getFileEntryRefForID(
                                   CI.getSourceManager().getMainFileID())) {
-      M->IsInferred = true;
       auto FileCharacter =
           M->IsSystem ? SrcMgr::C_System_ModuleMap : SrcMgr::C_User_ModuleMap;
       FileID OriginalModuleMapFID = CI.getSourceManager().getOrCreateFileID(
diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 64f90c493c1055d..e943f143d4c1588 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -1108,7 +1108,6 @@ void PrintPreambleAction::ExecuteAction() {
   case Language::Unknown:
   case Language::Asm:
   case Language::LLVM_IR:
-  case Language::RenderScript:
     // We can't do anything with these.
     return;
   }
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index e97953d87a2ff97..0211d1870b30a07 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -243,6 +243,7 @@ set(x86_files
   shaintrin.h
   sm3intrin.h
   sm4intrin.h
+  sm4evexintrin.h
   smmintrin.h
   tbmintrin.h
   tmmintrin.h
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index d6494762169b25b..4f00b7f1a8d9d4e 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2110,8 +2110,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a,
 ///    A 128-bit vector of [4 x i32].
 /// \returns A 128-bit vector of [4 x i32] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4su)__a + (__v4su)__b);
 }
 
@@ -2147,8 +2147,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) {
 ///    A 128-bit vector of [2 x i64].
 /// \returns A 128-bit vector of [2 x i64] containing the sums of both
 ///    parameters.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_add_epi64(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a + (__v2du)__b);
 }
 
@@ -2539,8 +2539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a,
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi32(__m128i __a, __m128i __b) {
   return (__m128i)((__v4su)__a - (__v4su)__b);
 }
 
@@ -2573,8 +2573,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) {
 ///    A 128-bit integer vector containing the subtrahends.
 /// \returns A 128-bit integer vector containing the differences of the values
 ///    in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a,
-                                                           __m128i __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_sub_epi64(__m128i __a, __m128i __b) {
   return (__m128i)((__v2du)__a - (__v2du)__b);
 }
 
@@ -3512,8 +3512,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) {
 ///    destination vector of [2 x i64].
 /// \returns An initialized 128-bit vector of [2 x i64] containing the values
 ///    provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
-                                                            long long __q0) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set_epi64x(long long __q1, long long __q0) {
   return __extension__(__m128i)(__v2di){__q0, __q1};
 }
 
@@ -3533,9 +3533,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1,
 ///    destination vector of [2 x i64].
 /// \returns An initialized 128-bit vector of [2 x i64] containing the values
 ///    provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
-                                                           __m64 __q0) {
-  return _mm_set_epi64x((long long)__q1, (long long)__q0);
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set_epi64(__m64 __q1, __m64 __q0) {
+  return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]);
 }
 
 /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with
@@ -3560,8 +3560,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1,
 ///    vector.
 /// \returns An initialized 128-bit vector of [4 x i32] containing the values
 ///    provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
-                                                           int __i1, int __i0) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3,
+                                                                     int __i2,
+                                                                     int __i1,
+                                                                     int __i0) {
   return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3};
 }
 
@@ -3599,7 +3601,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2,
 ///    vector.
 /// \returns An initialized 128-bit vector of [8 x i16] containing the values
 ///    provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
               short __w2, short __w1, short __w0) {
   return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3,
@@ -3648,7 +3650,7 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3,
 ///    Initializes bits [7:0] of the destination vector.
 /// \returns An initialized 128-bit vector of [16 x i8] containing the values
 ///    provided in the operands.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
              char __b10, char __b9, char __b8, char __b7, char __b6, char __b5,
              char __b4, char __b3, char __b2, char __b1, char __b0) {
@@ -3670,7 +3672,8 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11,
 ///    vector.
 /// \returns An initialized 128-bit integer vector of [2 x i64] with both
 ///    elements containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set1_epi64x(long long __q) {
   return _mm_set_epi64x(__q, __q);
 }
 
@@ -3687,7 +3690,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) {
 ///    vector.
 /// \returns An initialized 128-bit vector of [2 x i64] with all elements
 ///    containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set1_epi64(__m64 __q) {
   return _mm_set_epi64(__q, __q);
 }
 
@@ -3704,7 +3708,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) {
 ///    vector.
 /// \returns An initialized 128-bit vector of [4 x i32] with all elements
 ///    containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) {
   return _mm_set_epi32(__i, __i, __i, __i);
 }
 
@@ -3721,7 +3725,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) {
 ///    vector.
 /// \returns An initialized 128-bit vector of [8 x i16] with all elements
 ///    containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_set1_epi16(short __w) {
   return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
 }
 
@@ -3738,7 +3743,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) {
 ///    vector.
 /// \returns An initialized 128-bit vector of [16 x i8] with all elements
 ///    containing the value provided in the operand.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) {
   return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b,
                       __b, __b, __b, __b, __b);
 }
@@ -3757,8 +3762,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) {
 ///    A 64-bit integral value used to initialize the upper 64 bits of the
 ///    result.
 /// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
-                                                            __m64 __q1) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_setr_epi64(__m64 __q0, __m64 __q1) {
   return _mm_set_epi64(__q1, __q0);
 }
 
@@ -3779,9 +3784,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0,
 /// \param __i3
 ///    A 32-bit integral value used to initialize bits [127:96] of the result.
 /// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
-                                                            int __i2,
-                                                            int __i3) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) {
   return _mm_set_epi32(__i3, __i2, __i1, __i0);
 }
 
@@ -3810,7 +3814,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1,
 /// \param __w7
 ///    A 16-bit integral value used to initialize bits [127:112] of the result.
 /// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
                short __w5, short __w6, short __w7) {
   return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
@@ -3857,7 +3861,7 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4,
 /// \param __b15
 ///    An 8-bit integral value used to initialize bits [127:120] of the result.
 /// \returns An initialized 128-bit integer vector.
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
               char __b6, char __b7, char __b8, char __b9, char __b10,
               char __b11, char __b12, char __b13, char __b14, char __b15) {
diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
index 30dce60b3ff7029..d9f3a17ea23d8e7 100644
--- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h
+++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h
@@ -438,6 +438,24 @@ template <typename T> constexpr uint asuint(T F) {
   return __detail::bit_cast<uint, T>(F);
 }
 
+//===----------------------------------------------------------------------===//
+// asuint splitdouble builtins
+//===----------------------------------------------------------------------===//
+
+/// \fn void asuint(double D, out uint lowbits, out int highbits)
+/// \brief Split and interprets the lowbits and highbits of double D into uints.
+/// \param D The input double.
+/// \param lowbits The output lowbits of D.
+/// \param highbits The output highbits of D.
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
+void asuint(double, out uint, out uint);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
+void asuint(double2, out uint2, out uint2);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
+void asuint(double3, out uint3, out uint3);
+_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_splitdouble)
+void asuint(double4, out uint4, out uint4);
+
 //===----------------------------------------------------------------------===//
 // atan builtins
 //===----------------------------------------------------------------------===//
@@ -705,66 +723,88 @@ float4 cosh(float4);
 
 #ifdef __HLSL_ENABLE_16_BIT
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int16_t countbits(int16_t);
+const inline uint countbits(int16_t x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int16_t2 countbits(int16_t2);
+const inline uint2 countbits(int16_t2 x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int16_t3 countbits(int16_t3);
+const inline uint3 countbits(int16_t3 x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int16_t4 countbits(int16_t4);
+const inline uint4 countbits(int16_t4 x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint16_t countbits(uint16_t);
+const inline uint countbits(uint16_t x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint16_t2 countbits(uint16_t2);
+const inline uint2 countbits(uint16_t2 x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint16_t3 countbits(uint16_t3);
+const inline uint3 countbits(uint16_t3 x) {
+  return __builtin_elementwise_popcount(x);
+}
 _HLSL_AVAILABILITY(shadermodel, 6.2)
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint16_t4 countbits(uint16_t4);
+const inline uint4 countbits(uint16_t4 x) {
+  return __builtin_elementwise_popcount(x);
+}
 #endif
 
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int countbits(int);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int2 countbits(int2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int3 countbits(int3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int4 countbits(int4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint countbits(uint);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint2 countbits(uint2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint3 countbits(uint3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint4 countbits(uint4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int64_t countbits(int64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int64_t2 countbits(int64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int64_t3 countbits(int64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-int64_t4 countbits(int64_t4);
-
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint64_t countbits(uint64_t);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint64_t2 countbits(uint64_t2);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint64_t3 countbits(uint64_t3);
-_HLSL_BUILTIN_ALIAS(__builtin_elementwise_popcount)
-uint64_t4 countbits(uint64_t4);
+const inline uint countbits(int x) { return __builtin_elementwise_popcount(x); }
+const inline uint2 countbits(int2 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint3 countbits(int3 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint4 countbits(int4 x) {
+  return __builtin_elementwise_popcount(x);
+}
+
+const inline uint countbits(uint x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint2 countbits(uint2 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint3 countbits(uint3 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint4 countbits(uint4 x) {
+  return __builtin_elementwise_popcount(x);
+}
+
+const inline uint countbits(int64_t x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint2 countbits(int64_t2 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint3 countbits(int64_t3 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint4 countbits(int64_t4 x) {
+  return __builtin_elementwise_popcount(x);
+}
+
+const inline uint countbits(uint64_t x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint2 countbits(uint64_t2 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint3 countbits(uint64_t3 x) {
+  return __builtin_elementwise_popcount(x);
+}
+const inline uint4 countbits(uint64_t4 x) {
+  return __builtin_elementwise_popcount(x);
+}
 
 //===----------------------------------------------------------------------===//
 // degrees builtins
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 5f296d0a3324d00..65ad72bc479f49a 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -677,6 +677,11 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2_512satcvtintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2_512__) && defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
 #include <enqcmdintrin.h>
 #endif
diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 000000000000000..f6ae0037baea033
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index 8826ab449df4930..052be1395161d4f 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -1582,7 +1582,6 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
     }
   }
 
-  FileInfo.IsLocallyIncluded = true;
   IsFirstIncludeOfFile = PP.markIncluded(File);
   return true;
 }
diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp
index 0a02a63deba3dc1..dc9d2bfd5629c95 100644
--- a/clang/lib/Lex/ModuleMap.cpp
+++ b/clang/lib/Lex/ModuleMap.cpp
@@ -472,12 +472,12 @@ static bool violatesPrivateInclude(Module *RequestingModule,
     // as obtained from the lookup and as obtained from the module.
     // This check is not cheap, so enable it only for debugging.
     bool IsPrivate = false;
-    SmallVectorImpl<Module::Header> *HeaderList[] = {
-        &Header.getModule()->Headers[Module::HK_Private],
-        &Header.getModule()->Headers[Module::HK_PrivateTextual]};
-    for (auto *Hs : HeaderList)
+    ArrayRef<Module::Header> HeaderList[] = {
+        Header.getModule()->getHeaders(Module::HK_Private),
+        Header.getModule()->getHeaders(Module::HK_PrivateTextual)};
+    for (auto Hs : HeaderList)
       IsPrivate |= llvm::any_of(
-          *Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
+          Hs, [&](const Module::Header &H) { return H.Entry == IncFileEnt; });
     assert(IsPrivate && "inconsistent headers and roles");
   }
 #endif
@@ -655,10 +655,9 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
         SmallString<32> NameBuf;
         StringRef Name = sanitizeFilenameAsIdentifier(
             llvm::sys::path::stem(SkippedDir.getName()), NameBuf);
-        Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
-                                    Explicit).first;
-        InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
-        Result->IsInferred = true;
+        Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false,
+                                         Explicit);
+        setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
 
         // Associate the module and the directory.
         UmbrellaDirs[SkippedDir] = Result;
@@ -673,10 +672,9 @@ ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(FileEntryRef File) {
       SmallString<32> NameBuf;
       StringRef Name = sanitizeFilenameAsIdentifier(
                          llvm::sys::path::stem(File.getName()), NameBuf);
-      Result = findOrCreateModule(Name, Result, /*IsFramework=*/false,
-                                  Explicit).first;
-      InferredModuleAllowedBy[Result] = UmbrellaModuleMap;
-      Result->IsInferred = true;
+      Result = findOrCreateModuleFirst(Name, Result, /*IsFramework=*/false,
+                                       Explicit);
+      setInferredModuleAllowedBy(Result, UmbrellaModuleMap);
       Result->addTopHeader(File);
 
       // If inferred submodules export everything they import, add a
@@ -868,6 +866,15 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
     return std::make_pair(Sub, false);
 
   // Create a new module with this name.
+  Module *M = createModule(Name, Parent, IsFramework, IsExplicit);
+  return std::make_pair(M, true);
+}
+
+Module *ModuleMap::createModule(StringRef Name, Module *Parent,
+                                bool IsFramework, bool IsExplicit) {
+  assert(lookupModuleQualified(Name, Parent) == nullptr &&
+         "Creating duplicate submodule");
+
   Module *Result = new (ModulesAlloc.Allocate())
       Module(ModuleConstructorTag{}, Name, SourceLocation(), Parent,
              IsFramework, IsExplicit, NumCreatedModules++);
@@ -877,7 +884,7 @@ std::pair<Module *, bool> ModuleMap::findOrCreateModule(StringRef Name,
     Modules[Name] = Result;
     ModuleScopeIDs[Result] = CurrentModuleScopeID;
   }
-  return std::make_pair(Result, true);
+  return Result;
 }
 
 Module *ModuleMap::createGlobalModuleFragmentForModuleUnit(SourceLocation Loc,
@@ -1097,8 +1104,7 @@ Module *ModuleMap::inferFrameworkModule(DirectoryEntryRef FrameworkDir,
   Module *Result = new (ModulesAlloc.Allocate())
       Module(ModuleConstructorTag{}, ModuleName, SourceLocation(), Parent,
              /*IsFramework=*/true, /*IsExplicit=*/false, NumCreatedModules++);
-  InferredModuleAllowedBy[Result] = ModuleMapFID;
-  Result->IsInferred = true;
+  setInferredModuleAllowedBy(Result, ModuleMapFID);
   if (!Parent) {
     if (LangOpts.CurrentModule == ModuleName)
       SourceModule = Result;
@@ -1296,27 +1302,28 @@ void ModuleMap::addHeader(Module *Mod, Module::Header Header,
                           ModuleHeaderRole Role, bool Imported) {
   KnownHeader KH(Mod, Role);
 
+  FileEntryRef HeaderEntry = Header.Entry;
+
   // Only add each header to the headers list once.
   // FIXME: Should we diagnose if a header is listed twice in the
   // same module definition?
-  auto &HeaderList = Headers[Header.Entry];
+  auto &HeaderList = Headers[HeaderEntry];
   if (llvm::is_contained(HeaderList, KH))
     return;
 
   HeaderList.push_back(KH);
-  Mod->Headers[headerRoleToKind(Role)].push_back(Header);
+  Mod->addHeader(headerRoleToKind(Role), std::move(Header));
 
   bool isCompilingModuleHeader = Mod->isForBuilding(LangOpts);
   if (!Imported || isCompilingModuleHeader) {
     // When we import HeaderFileInfo, the external source is expected to
     // set the isModuleHeader flag itself.
-    HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
-                                    isCompilingModuleHeader);
+    HeaderInfo.MarkFileModuleHeader(HeaderEntry, Role, isCompilingModuleHeader);
   }
 
   // Notify callbacks that we just added a new header.
   for (const auto &Cb : Callbacks)
-    Cb->moduleMapAddHeader(Header.Entry.getName());
+    Cb->moduleMapAddHeader(HeaderEntry.getName());
 }
 
 FileID ModuleMap::getContainingModuleMapFileID(const Module *Module) const {
@@ -1345,7 +1352,7 @@ ModuleMap::getModuleMapFileForUniquing(const Module *M) const {
 }
 
 void ModuleMap::setInferredModuleAllowedBy(Module *M, FileID ModMapFID) {
-  assert(M->IsInferred && "module not inferred");
+  M->IsInferred = true;
   InferredModuleAllowedBy[M] = ModMapFID;
 }
 
@@ -2125,9 +2132,8 @@ void ModuleMapParser::parseModuleDecl() {
     ActiveModule =
         Map.createShadowedModule(ModuleName, Framework, ShadowingModule);
   } else {
-    ActiveModule =
-        Map.findOrCreateModule(ModuleName, ActiveModule, Framework, Explicit)
-            .first;
+    ActiveModule = Map.findOrCreateModuleFirst(ModuleName, ActiveModule,
+                                               Framework, Explicit);
   }
 
   ActiveModule->DefinitionLoc = ModuleNameLoc;
diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp
index 7d727efb228731c..6470e55e521add7 100644
--- a/clang/lib/Parse/ParseStmt.cpp
+++ b/clang/lib/Parse/ParseStmt.cpp
@@ -1243,6 +1243,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) {
       ParsedStmtContext::Compound |
       (isStmtExpr ? ParsedStmtContext::InStmtExpr : ParsedStmtContext());
 
+  bool LastIsError = false;
   while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) &&
          Tok.isNot(tok::eof)) {
     if (Tok.is(tok::annot_pragma_unused)) {
@@ -1299,7 +1300,15 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) {
 
     if (R.isUsable())
       Stmts.push_back(R.get());
+    LastIsError = R.isInvalid();
   }
+  // StmtExpr needs to do copy initialization for last statement.
+  // If last statement is invalid, the last statement in `Stmts` will be
+  // incorrect. Then the whole compound statement should also be marked as
+  // invalid to prevent subsequent errors.
+  if (isStmtExpr && LastIsError && !Stmts.empty())
+    return StmtError();
+
   // Warn the user that using option `-ffp-eval-method=source` on a
   // 32-bit target and feature `sse` disabled, or using
   // `pragma clang fp eval_method=source` and feature `sse` disabled, is not
diff --git a/clang/lib/Sema/CheckExprLifetime.cpp b/clang/lib/Sema/CheckExprLifetime.cpp
index aa0a2e223e708f0..357082fe329350d 100644
--- a/clang/lib/Sema/CheckExprLifetime.cpp
+++ b/clang/lib/Sema/CheckExprLifetime.cpp
@@ -472,7 +472,7 @@ shouldTrackFirstArgumentForConstructor(const CXXConstructExpr *Ctor) {
 }
 
 // Return true if this is an "normal" assignment operator.
-// We assuments that a normal assingment operator always returns *this, that is,
+// We assume that a normal assignment operator always returns *this, that is,
 // an lvalue reference that is the same type as the implicit object parameter
 // (or the LHS for a non-member operator$=).
 static bool isNormalAssignmentOperator(const FunctionDecl *FD) {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 27b274d74ce716f..dae271c1ff50014 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -8899,18 +8899,41 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call,
           << ArgIdx << FnName << PointeeTy
           << Call->getCallee()->getSourceRange());
     else if (const auto *RT = PointeeTy->getAs<RecordType>()) {
+
+      // FIXME: Do not consider incomplete types even though they may be
+      // completed later. GCC does not diagnose such code, but we may want to
+      // consider diagnosing it in the future, perhaps under a different, but
+      // related, diagnostic group.
+      bool MayBeTriviallyCopyableCXXRecord =
+          RT->isIncompleteType() ||
+          RT->desugar().isTriviallyCopyableType(Context);
+
       if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) &&
           RT->getDecl()->isNonTrivialToPrimitiveDefaultInitialize()) {
         DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
                             PDiag(diag::warn_cstruct_memaccess)
                                 << ArgIdx << FnName << PointeeTy << 0);
         SearchNonTrivialToInitializeField::diag(PointeeTy, Dest, *this);
+      } else if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) &&
+                 !MayBeTriviallyCopyableCXXRecord && ArgIdx == 0) {
+        // FIXME: Limiting this warning to dest argument until we decide
+        // whether it's valid for source argument too.
+        DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
+                            PDiag(diag::warn_cxxstruct_memaccess)
+                                << FnName << PointeeTy);
       } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) &&
                  RT->getDecl()->isNonTrivialToPrimitiveCopy()) {
         DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
                             PDiag(diag::warn_cstruct_memaccess)
                                 << ArgIdx << FnName << PointeeTy << 1);
         SearchNonTrivialToCopyField::diag(PointeeTy, Dest, *this);
+      } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) &&
+                 !MayBeTriviallyCopyableCXXRecord && ArgIdx == 0) {
+        // FIXME: Limiting this warning to dest argument until we decide
+        // whether it's valid for source argument too.
+        DiagRuntimeBehavior(Dest->getExprLoc(), Dest,
+                            PDiag(diag::warn_cxxstruct_memaccess)
+                                << FnName << PointeeTy);
       } else {
         continue;
       }
@@ -9574,16 +9597,23 @@ static QualType GetExprType(const Expr *E) {
   return Ty;
 }
 
-/// Pseudo-evaluate the given integer expression, estimating the
-/// range of values it might take.
+/// Attempts to estimate an approximate range for the given integer expression.
+/// Returns a range if successful, otherwise it returns \c std::nullopt if a
+/// reliable estimation cannot be determined.
 ///
 /// \param MaxWidth The width to which the value will be truncated.
-/// \param Approximate If \c true, return a likely range for the result: in
-///        particular, assume that arithmetic on narrower types doesn't leave
-///        those types. If \c false, return a range including all possible
-///        result values.
-static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
-                             bool InConstantContext, bool Approximate) {
+/// \param InConstantContext If \c true, interpret the expression within a
+///        constant context.
+/// \param Approximate If \c true, provide a likely range of values by assuming
+///        that arithmetic on narrower types remains within those types.
+///        If \c false, return a range that includes all possible values
+///        resulting from the expression.
+/// \returns A range of values that the expression might take, or
+///          std::nullopt if a reliable estimation cannot be determined.
+static std::optional<IntRange> TryGetExprRange(ASTContext &C, const Expr *E,
+                                               unsigned MaxWidth,
+                                               bool InConstantContext,
+                                               bool Approximate) {
   E = E->IgnoreParens();
 
   // Try a full evaluation first.
@@ -9596,8 +9626,8 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
   // being of the new, wider type.
   if (const auto *CE = dyn_cast<ImplicitCastExpr>(E)) {
     if (CE->getCastKind() == CK_NoOp || CE->getCastKind() == CK_LValueToRValue)
-      return GetExprRange(C, CE->getSubExpr(), MaxWidth, InConstantContext,
-                          Approximate);
+      return TryGetExprRange(C, CE->getSubExpr(), MaxWidth, InConstantContext,
+                             Approximate);
 
     IntRange OutputTypeRange = IntRange::forValueOfType(C, GetExprType(CE));
 
@@ -9608,40 +9638,52 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
     if (!isIntegerCast)
       return OutputTypeRange;
 
-    IntRange SubRange = GetExprRange(C, CE->getSubExpr(),
-                                     std::min(MaxWidth, OutputTypeRange.Width),
-                                     InConstantContext, Approximate);
+    std::optional<IntRange> SubRange = TryGetExprRange(
+        C, CE->getSubExpr(), std::min(MaxWidth, OutputTypeRange.Width),
+        InConstantContext, Approximate);
+    if (!SubRange)
+      return std::nullopt;
 
     // Bail out if the subexpr's range is as wide as the cast type.
-    if (SubRange.Width >= OutputTypeRange.Width)
+    if (SubRange->Width >= OutputTypeRange.Width)
       return OutputTypeRange;
 
     // Otherwise, we take the smaller width, and we're non-negative if
     // either the output type or the subexpr is.
-    return IntRange(SubRange.Width,
-                    SubRange.NonNegative || OutputTypeRange.NonNegative);
+    return IntRange(SubRange->Width,
+                    SubRange->NonNegative || OutputTypeRange.NonNegative);
   }
 
   if (const auto *CO = dyn_cast<ConditionalOperator>(E)) {
     // If we can fold the condition, just take that operand.
     bool CondResult;
     if (CO->getCond()->EvaluateAsBooleanCondition(CondResult, C))
-      return GetExprRange(C,
-                          CondResult ? CO->getTrueExpr() : CO->getFalseExpr(),
-                          MaxWidth, InConstantContext, Approximate);
+      return TryGetExprRange(
+          C, CondResult ? CO->getTrueExpr() : CO->getFalseExpr(), MaxWidth,
+          InConstantContext, Approximate);
 
     // Otherwise, conservatively merge.
-    // GetExprRange requires an integer expression, but a throw expression
+    // TryGetExprRange requires an integer expression, but a throw expression
     // results in a void type.
-    Expr *E = CO->getTrueExpr();
-    IntRange L = E->getType()->isVoidType()
-                     ? IntRange{0, true}
-                     : GetExprRange(C, E, MaxWidth, InConstantContext, Approximate);
-    E = CO->getFalseExpr();
-    IntRange R = E->getType()->isVoidType()
-                     ? IntRange{0, true}
-                     : GetExprRange(C, E, MaxWidth, InConstantContext, Approximate);
-    return IntRange::join(L, R);
+    Expr *TrueExpr = CO->getTrueExpr();
+    if (TrueExpr->getType()->isVoidType())
+      return std::nullopt;
+
+    std::optional<IntRange> L =
+        TryGetExprRange(C, TrueExpr, MaxWidth, InConstantContext, Approximate);
+    if (!L)
+      return std::nullopt;
+
+    Expr *FalseExpr = CO->getFalseExpr();
+    if (FalseExpr->getType()->isVoidType())
+      return std::nullopt;
+
+    std::optional<IntRange> R =
+        TryGetExprRange(C, FalseExpr, MaxWidth, InConstantContext, Approximate);
+    if (!R)
+      return std::nullopt;
+
+    return IntRange::join(*L, *R);
   }
 
   if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
@@ -9678,8 +9720,8 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
     // been coerced to the LHS type.
     case BO_Assign:
       // TODO: bitfields?
-      return GetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext,
-                          Approximate);
+      return TryGetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext,
+                             Approximate);
 
     // Operations with opaque sources are black-listed.
     case BO_PtrMemD:
@@ -9711,18 +9753,20 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
     // Right shift by a constant can narrow its left argument.
     case BO_Shr:
     case BO_ShrAssign: {
-      IntRange L = GetExprRange(C, BO->getLHS(), MaxWidth, InConstantContext,
-                                Approximate);
+      std::optional<IntRange> L = TryGetExprRange(
+          C, BO->getLHS(), MaxWidth, InConstantContext, Approximate);
+      if (!L)
+        return std::nullopt;
 
       // If the shift amount is a positive constant, drop the width by
       // that much.
       if (std::optional<llvm::APSInt> shift =
               BO->getRHS()->getIntegerConstantExpr(C)) {
         if (shift->isNonNegative()) {
-          if (shift->uge(L.Width))
-            L.Width = (L.NonNegative ? 0 : 1);
+          if (shift->uge(L->Width))
+            L->Width = (L->NonNegative ? 0 : 1);
           else
-            L.Width -= shift->getZExtValue();
+            L->Width -= shift->getZExtValue();
         }
       }
 
@@ -9731,8 +9775,8 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
 
     // Comma acts as its right operand.
     case BO_Comma:
-      return GetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext,
-                          Approximate);
+      return TryGetExprRange(C, BO->getRHS(), MaxWidth, InConstantContext,
+                             Approximate);
 
     case BO_Add:
       if (!Approximate)
@@ -9756,26 +9800,31 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
     case BO_Div: {
       // Don't 'pre-truncate' the operands.
       unsigned opWidth = C.getIntWidth(GetExprType(E));
-      IntRange L = GetExprRange(C, BO->getLHS(), opWidth, InConstantContext,
-                                Approximate);
+      std::optional<IntRange> L = TryGetExprRange(
+          C, BO->getLHS(), opWidth, InConstantContext, Approximate);
+      if (!L)
+        return std::nullopt;
 
       // If the divisor is constant, use that.
       if (std::optional<llvm::APSInt> divisor =
               BO->getRHS()->getIntegerConstantExpr(C)) {
         unsigned log2 = divisor->logBase2(); // floor(log_2(divisor))
-        if (log2 >= L.Width)
-          L.Width = (L.NonNegative ? 0 : 1);
+        if (log2 >= L->Width)
+          L->Width = (L->NonNegative ? 0 : 1);
         else
-          L.Width = std::min(L.Width - log2, MaxWidth);
+          L->Width = std::min(L->Width - log2, MaxWidth);
         return L;
       }
 
       // Otherwise, just use the LHS's width.
       // FIXME: This is wrong if the LHS could be its minimal value and the RHS
       // could be -1.
-      IntRange R = GetExprRange(C, BO->getRHS(), opWidth, InConstantContext,
-                                Approximate);
-      return IntRange(L.Width, L.NonNegative && R.NonNegative);
+      std::optional<IntRange> R = TryGetExprRange(
+          C, BO->getRHS(), opWidth, InConstantContext, Approximate);
+      if (!R)
+        return std::nullopt;
+
+      return IntRange(L->Width, L->NonNegative && R->NonNegative);
     }
 
     case BO_Rem:
@@ -9792,11 +9841,17 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
     // performed the computation.
     QualType T = GetExprType(E);
     unsigned opWidth = C.getIntWidth(T);
-    IntRange L =
-        GetExprRange(C, BO->getLHS(), opWidth, InConstantContext, Approximate);
-    IntRange R =
-        GetExprRange(C, BO->getRHS(), opWidth, InConstantContext, Approximate);
-    IntRange C = Combine(L, R);
+    std::optional<IntRange> L = TryGetExprRange(C, BO->getLHS(), opWidth,
+                                                InConstantContext, Approximate);
+    if (!L)
+      return std::nullopt;
+
+    std::optional<IntRange> R = TryGetExprRange(C, BO->getRHS(), opWidth,
+                                                InConstantContext, Approximate);
+    if (!R)
+      return std::nullopt;
+
+    IntRange C = Combine(*L, *R);
     C.NonNegative |= T->isUnsignedIntegerOrEnumerationType();
     C.Width = std::min(C.Width, MaxWidth);
     return C;
@@ -9814,26 +9869,30 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
       return IntRange::forValueOfType(C, GetExprType(E));
 
     default:
-      return GetExprRange(C, UO->getSubExpr(), MaxWidth, InConstantContext,
-                          Approximate);
+      return TryGetExprRange(C, UO->getSubExpr(), MaxWidth, InConstantContext,
+                             Approximate);
     }
   }
 
   if (const auto *OVE = dyn_cast<OpaqueValueExpr>(E))
-    return GetExprRange(C, OVE->getSourceExpr(), MaxWidth, InConstantContext,
-                        Approximate);
+    return TryGetExprRange(C, OVE->getSourceExpr(), MaxWidth, InConstantContext,
+                           Approximate);
 
   if (const auto *BitField = E->getSourceBitField())
     return IntRange(BitField->getBitWidthValue(C),
                     BitField->getType()->isUnsignedIntegerOrEnumerationType());
 
+  if (GetExprType(E)->isVoidType())
+    return std::nullopt;
+
   return IntRange::forValueOfType(C, GetExprType(E));
 }
 
-static IntRange GetExprRange(ASTContext &C, const Expr *E,
-                             bool InConstantContext, bool Approximate) {
-  return GetExprRange(C, E, C.getIntWidth(GetExprType(E)), InConstantContext,
-                      Approximate);
+static std::optional<IntRange> TryGetExprRange(ASTContext &C, const Expr *E,
+                                               bool InConstantContext,
+                                               bool Approximate) {
+  return TryGetExprRange(C, E, C.getIntWidth(GetExprType(E)), InConstantContext,
+                         Approximate);
 }
 
 /// Checks whether the given value, which currently has the given
@@ -10078,8 +10137,10 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E,
       S.Context.hasSameUnqualifiedType(Constant->getType(), Other->getType()))
     return false;
 
-  IntRange OtherValueRange = GetExprRange(
+  std::optional<IntRange> OtherValueRange = TryGetExprRange(
       S.Context, Other, S.isConstantEvaluatedContext(), /*Approximate=*/false);
+  if (!OtherValueRange)
+    return false;
 
   QualType OtherT = Other->getType();
   if (const auto *AT = OtherT->getAs<AtomicType>())
@@ -10097,11 +10158,11 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E,
   bool OtherIsBooleanDespiteType =
       !OtherT->isBooleanType() && Other->isKnownToHaveBooleanValue();
   if (OtherIsBooleanDespiteType || IsObjCSignedCharBool)
-    OtherTypeRange = OtherValueRange = IntRange::forBoolType();
+    OtherTypeRange = *OtherValueRange = IntRange::forBoolType();
 
   // Check if all values in the range of possible values of this expression
   // lead to the same comparison outcome.
-  PromotedRange OtherPromotedValueRange(OtherValueRange, Value.getBitWidth(),
+  PromotedRange OtherPromotedValueRange(*OtherValueRange, Value.getBitWidth(),
                                         Value.isUnsigned());
   auto Cmp = OtherPromotedValueRange.compare(Value);
   auto Result = PromotedRange::constantValue(E->getOpcode(), Cmp, RhsConstant);
@@ -10125,7 +10186,7 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E,
 
   // Don't warn if the non-constant operand actually always evaluates to the
   // same value.
-  if (!TautologicalTypeCompare && OtherValueRange.Width == 0)
+  if (!TautologicalTypeCompare && OtherValueRange->Width == 0)
     return false;
 
   // Suppress the diagnostic for an in-range comparison if the constant comes
@@ -10164,7 +10225,7 @@ static bool CheckTautologicalComparison(Sema &S, BinaryOperator *E,
 
   if (!TautologicalTypeCompare) {
     S.Diag(E->getOperatorLoc(), diag::warn_tautological_compare_value_range)
-        << RhsConstant << OtherValueRange.Width << OtherValueRange.NonNegative
+        << RhsConstant << OtherValueRange->Width << OtherValueRange->NonNegative
         << E->getOpcodeStr() << OS.str() << *Result
         << E->getLHS()->getSourceRange() << E->getRHS()->getSourceRange();
     return true;
@@ -10294,9 +10355,11 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   }
 
   // Otherwise, calculate the effective range of the signed operand.
-  IntRange signedRange =
-      GetExprRange(S.Context, signedOperand, S.isConstantEvaluatedContext(),
-                   /*Approximate=*/true);
+  std::optional<IntRange> signedRange =
+      TryGetExprRange(S.Context, signedOperand, S.isConstantEvaluatedContext(),
+                      /*Approximate=*/true);
+  if (!signedRange)
+    return;
 
   // Go ahead and analyze implicit conversions in the operands.  Note
   // that we skip the implicit conversions on both sides.
@@ -10304,7 +10367,7 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   AnalyzeImplicitConversions(S, RHS, E->getOperatorLoc());
 
   // If the signed range is non-negative, -Wsign-compare won't fire.
-  if (signedRange.NonNegative)
+  if (signedRange->NonNegative)
     return;
 
   // For (in)equality comparisons, if the unsigned operand is a
@@ -10313,15 +10376,17 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   // change the result of the comparison.
   if (E->isEqualityOp()) {
     unsigned comparisonWidth = S.Context.getIntWidth(T);
-    IntRange unsignedRange =
-        GetExprRange(S.Context, unsignedOperand, S.isConstantEvaluatedContext(),
-                     /*Approximate=*/true);
+    std::optional<IntRange> unsignedRange = TryGetExprRange(
+        S.Context, unsignedOperand, S.isConstantEvaluatedContext(),
+        /*Approximate=*/true);
+    if (!unsignedRange)
+      return;
 
     // We should never be unable to prove that the unsigned operand is
     // non-negative.
-    assert(unsignedRange.NonNegative && "unsigned range includes negative?");
+    assert(unsignedRange->NonNegative && "unsigned range includes negative?");
 
-    if (unsignedRange.Width < comparisonWidth)
+    if (unsignedRange->Width < comparisonWidth)
       return;
   }
 
@@ -11128,10 +11193,12 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
   if (SourceBT && TargetBT && SourceBT->isIntegerType() &&
       TargetBT->isFloatingType() && !IsListInit) {
     // Determine the number of precision bits in the source integer type.
-    IntRange SourceRange =
-        GetExprRange(Context, E, isConstantEvaluatedContext(),
-                     /*Approximate=*/true);
-    unsigned int SourcePrecision = SourceRange.Width;
+    std::optional<IntRange> SourceRange =
+        TryGetExprRange(Context, E, isConstantEvaluatedContext(),
+                        /*Approximate=*/true);
+    if (!SourceRange)
+      return;
+    unsigned int SourcePrecision = SourceRange->Width;
 
     // Determine the number of precision bits in the
     // target floating point type.
@@ -11194,14 +11261,16 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
         E, Diag(CC, diag::warn_impcast_int_to_objc_signed_char_bool)
                << E->getType());
   }
+  std::optional<IntRange> LikelySourceRange = TryGetExprRange(
+      Context, E, isConstantEvaluatedContext(), /*Approximate=*/true);
+  if (!LikelySourceRange)
+    return;
 
   IntRange SourceTypeRange =
       IntRange::forTargetOfCanonicalType(Context, Source);
-  IntRange LikelySourceRange = GetExprRange(
-      Context, E, isConstantEvaluatedContext(), /*Approximate=*/true);
   IntRange TargetRange = IntRange::forTargetOfCanonicalType(Context, Target);
 
-  if (LikelySourceRange.Width > TargetRange.Width) {
+  if (LikelySourceRange->Width > TargetRange.Width) {
     // If the source is a constant, use a default-on diagnostic.
     // TODO: this should happen for bitfield stores, too.
     Expr::EvalResult Result;
@@ -11248,8 +11317,8 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
         }
   }
 
-  if (TargetRange.Width == LikelySourceRange.Width &&
-      !TargetRange.NonNegative && LikelySourceRange.NonNegative &&
+  if (TargetRange.Width == LikelySourceRange->Width &&
+      !TargetRange.NonNegative && LikelySourceRange->NonNegative &&
       Source->isSignedIntegerType()) {
     // Warn when doing a signed to signed conversion, warn if the positive
     // source value is exactly the width of the target type, which will
@@ -11275,9 +11344,9 @@ void Sema::CheckImplicitConversion(Expr *E, QualType T, SourceLocation CC,
   }
 
   if ((!isa<EnumType>(Target) || !isa<EnumType>(Source)) &&
-      ((TargetRange.NonNegative && !LikelySourceRange.NonNegative) ||
-       (!TargetRange.NonNegative && LikelySourceRange.NonNegative &&
-        LikelySourceRange.Width == TargetRange.Width))) {
+      ((TargetRange.NonNegative && !LikelySourceRange->NonNegative) ||
+       (!TargetRange.NonNegative && LikelySourceRange->NonNegative &&
+        LikelySourceRange->Width == TargetRange.Width))) {
     if (SourceMgr.isInSystemMacro(CC))
       return;
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 25061f02c13f6ca..f8e5f3c6d309d67 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -6940,7 +6940,7 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
     }
   }
 
-  // Check the attributes on the function type, if any.
+  // Check the attributes on the function type and function params, if any.
   if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
     // Don't declare this variable in the second operand of the for-statement;
     // GCC miscompiles that by ending its lifetime before evaluating the
@@ -6970,6 +6970,18 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
         }
       }
     }
+
+    for (unsigned int I = 0; I < FD->getNumParams(); ++I) {
+      const ParmVarDecl *P = FD->getParamDecl(I);
+
+      // The [[lifetimebound]] attribute can be applied to a function parameter
+      // only if the function returns a value.
+      if (auto *A = P->getAttr<LifetimeBoundAttr>()) {
+        if (!isa<CXXConstructorDecl>(FD) && FD->getReturnType()->isVoidType()) {
+          S.Diag(A->getLocation(), diag::err_lifetimebound_void_return_type);
+        }
+      }
+    }
   }
 }
 
diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp
index 1f6c5b8d4561bcd..a472538236e2d91 100644
--- a/clang/lib/Sema/SemaHLSL.cpp
+++ b/clang/lib/Sema/SemaHLSL.cpp
@@ -1698,18 +1698,27 @@ static bool CheckVectorElementCallArgs(Sema *S, CallExpr *TheCall) {
   return true;
 }
 
-static bool CheckArgsTypesAreCorrect(
+bool CheckArgTypeIsCorrect(
+    Sema *S, Expr *Arg, QualType ExpectedType,
+    llvm::function_ref<bool(clang::QualType PassedType)> Check) {
+  QualType PassedType = Arg->getType();
+  if (Check(PassedType)) {
+    if (auto *VecTyA = PassedType->getAs<VectorType>())
+      ExpectedType = S->Context.getVectorType(
+          ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind());
+    S->Diag(Arg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+        << PassedType << ExpectedType << 1 << 0 << 0;
+    return true;
+  }
+  return false;
+}
+
+bool CheckAllArgTypesAreCorrect(
     Sema *S, CallExpr *TheCall, QualType ExpectedType,
     llvm::function_ref<bool(clang::QualType PassedType)> Check) {
   for (unsigned i = 0; i < TheCall->getNumArgs(); ++i) {
-    QualType PassedType = TheCall->getArg(i)->getType();
-    if (Check(PassedType)) {
-      if (auto *VecTyA = PassedType->getAs<VectorType>())
-        ExpectedType = S->Context.getVectorType(
-            ExpectedType, VecTyA->getNumElements(), VecTyA->getVectorKind());
-      S->Diag(TheCall->getArg(0)->getBeginLoc(),
-              diag::err_typecheck_convert_incompatible)
-          << PassedType << ExpectedType << 1 << 0 << 0;
+    Expr *Arg = TheCall->getArg(i);
+    if (CheckArgTypeIsCorrect(S, Arg, ExpectedType, Check)) {
       return true;
     }
   }
@@ -1720,8 +1729,8 @@ static bool CheckAllArgsHaveFloatRepresentation(Sema *S, CallExpr *TheCall) {
   auto checkAllFloatTypes = [](clang::QualType PassedType) -> bool {
     return !PassedType->hasFloatingRepresentation();
   };
-  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy,
-                                  checkAllFloatTypes);
+  return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy,
+                                    checkAllFloatTypes);
 }
 
 static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
@@ -1732,8 +1741,19 @@ static bool CheckFloatOrHalfRepresentations(Sema *S, CallExpr *TheCall) {
             : PassedType;
     return !BaseType->isHalfType() && !BaseType->isFloat32Type();
   };
-  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy,
-                                  checkFloatorHalf);
+  return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy,
+                                    checkFloatorHalf);
+}
+
+static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall,
+                                  unsigned ArgIndex) {
+  auto *Arg = TheCall->getArg(ArgIndex);
+  SourceLocation OrigLoc = Arg->getExprLoc();
+  if (Arg->IgnoreCasts()->isModifiableLvalue(S->Context, &OrigLoc) ==
+      Expr::MLV_Valid)
+    return false;
+  S->Diag(OrigLoc, diag::error_hlsl_inout_lvalue) << Arg << 0;
+  return true;
 }
 
 static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) {
@@ -1742,24 +1762,24 @@ static bool CheckNoDoubleVectors(Sema *S, CallExpr *TheCall) {
       return VecTy->getElementType()->isDoubleType();
     return false;
   };
-  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.FloatTy,
-                                  checkDoubleVector);
+  return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.FloatTy,
+                                    checkDoubleVector);
 }
 static bool CheckFloatingOrIntRepresentation(Sema *S, CallExpr *TheCall) {
   auto checkAllSignedTypes = [](clang::QualType PassedType) -> bool {
     return !PassedType->hasIntegerRepresentation() &&
            !PassedType->hasFloatingRepresentation();
   };
-  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.IntTy,
-                                  checkAllSignedTypes);
+  return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.IntTy,
+                                    checkAllSignedTypes);
 }
 
 static bool CheckUnsignedIntRepresentation(Sema *S, CallExpr *TheCall) {
   auto checkAllUnsignedTypes = [](clang::QualType PassedType) -> bool {
     return !PassedType->hasUnsignedIntegerRepresentation();
   };
-  return CheckArgsTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
-                                  checkAllUnsignedTypes);
+  return CheckAllArgTypesAreCorrect(S, TheCall, S->Context.UnsignedIntTy,
+                                    checkAllUnsignedTypes);
 }
 
 static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall,
@@ -2074,6 +2094,22 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
       return true;
     break;
   }
+  case Builtin::BI__builtin_hlsl_elementwise_splitdouble: {
+    if (SemaRef.checkArgCount(TheCall, 3))
+      return true;
+
+    if (CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.DoubleTy, 0) ||
+        CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.UnsignedIntTy,
+                            1) ||
+        CheckScalarOrVector(&SemaRef, TheCall, SemaRef.Context.UnsignedIntTy,
+                            2))
+      return true;
+
+    if (CheckModifiableLValue(&SemaRef, TheCall, 1) ||
+        CheckModifiableLValue(&SemaRef, TheCall, 2))
+      return true;
+    break;
+  }
   case Builtin::BI__builtin_elementwise_acos:
   case Builtin::BI__builtin_elementwise_asin:
   case Builtin::BI__builtin_elementwise_atan:
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index db1d7fa237131a8..b45f30fed49a647 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -6163,7 +6163,7 @@ struct TemplateArgumentListAreEqual {
             std::enable_if_t<!std::is_same_v<T1, T2>, bool> = true>
   bool operator()(T1 *Spec, T2 *Primary) {
     ArrayRef<TemplateArgument> Args1 = Spec->getTemplateArgs().asArray(),
-                               Args2 = Primary->getInjectedTemplateArgs();
+                               Args2 = Primary->getInjectedTemplateArgs(Ctx);
 
     for (unsigned I = 0, E = Args1.size(); I < E; ++I) {
       // We use profile, instead of structural comparison of the arguments,
@@ -6342,7 +6342,7 @@ bool Sema::isMoreSpecializedThanPrimary(
   VarTemplateDecl *Primary = Spec->getSpecializedTemplate();
   TemplateName Name(Primary);
   QualType PrimaryT = Context.getTemplateSpecializationType(
-      Name, Primary->getInjectedTemplateArgs());
+      Name, Primary->getInjectedTemplateArgs(Context));
   QualType PartialT = Context.getTemplateSpecializationType(
       Name, Spec->getTemplateArgs().asArray());
 
@@ -6372,18 +6372,14 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
   //    - Each function template has a single function parameter whose type is
   //      a specialization of X with template arguments corresponding to the
   //      template parameters from the respective function template
-  SmallVector<TemplateArgument, 8> AArgs;
-  Context.getInjectedTemplateArgs(A, AArgs);
+  SmallVector<TemplateArgument, 8> AArgs(A->getInjectedTemplateArgs(Context));
 
   // Check P's arguments against A's parameter list. This will fill in default
   // template arguments as needed. AArgs are already correct by construction.
   // We can't just use CheckTemplateIdType because that will expand alias
   // templates.
-  SmallVector<TemplateArgument, 4> PArgs;
+  SmallVector<TemplateArgument, 4> PArgs(P->getInjectedTemplateArgs(Context));
   {
-    SFINAETrap Trap(*this);
-
-    Context.getInjectedTemplateArgs(P, PArgs);
     TemplateArgumentListInfo PArgList(P->getLAngleLoc(),
                                       P->getRAngleLoc());
     for (unsigned I = 0, N = P->size(); I != N; ++I) {
@@ -6399,6 +6395,7 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs(
     }
     PArgs.clear();
 
+    SFINAETrap Trap(*this);
     // C++1z [temp.arg.template]p3:
     //   If the rewrite produces an invalid type, then P is not at least as
     //   specialized as A.
diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp
index 6a55861fe5af3b1..dea97bfce532c9d 100644
--- a/clang/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp
@@ -200,7 +200,7 @@ struct TemplateInstantiationArgumentCollecter
     if (Innermost)
       AddInnermostTemplateArguments(FTD);
     else if (ForConstraintInstantiation)
-      AddOuterTemplateArguments(FTD, FTD->getInjectedTemplateArgs(),
+      AddOuterTemplateArguments(FTD, FTD->getInjectedTemplateArgs(S.Context),
                                 /*Final=*/false);
 
     if (FTD->isMemberSpecialization())
@@ -219,7 +219,7 @@ struct TemplateInstantiationArgumentCollecter
     if (Innermost)
       AddInnermostTemplateArguments(VTD);
     else if (ForConstraintInstantiation)
-      AddOuterTemplateArguments(VTD, VTD->getInjectedTemplateArgs(),
+      AddOuterTemplateArguments(VTD, VTD->getInjectedTemplateArgs(S.Context),
                                 /*Final=*/false);
 
     if (VTD->isMemberSpecialization())
@@ -237,7 +237,8 @@ struct TemplateInstantiationArgumentCollecter
     if (Innermost)
       AddInnermostTemplateArguments(VTPSD);
     else if (ForConstraintInstantiation)
-      AddOuterTemplateArguments(VTPSD, VTPSD->getInjectedTemplateArgs(),
+      AddOuterTemplateArguments(VTPSD,
+                                VTPSD->getInjectedTemplateArgs(S.Context),
                                 /*Final=*/false);
 
     if (VTPSD->isMemberSpecialization())
@@ -254,7 +255,7 @@ struct TemplateInstantiationArgumentCollecter
     if (Innermost)
       AddInnermostTemplateArguments(CTD);
     else if (ForConstraintInstantiation)
-      AddOuterTemplateArguments(CTD, CTD->getInjectedTemplateArgs(),
+      AddOuterTemplateArguments(CTD, CTD->getInjectedTemplateArgs(S.Context),
                                 /*Final=*/false);
 
     if (CTD->isMemberSpecialization())
@@ -274,7 +275,8 @@ struct TemplateInstantiationArgumentCollecter
     if (Innermost)
       AddInnermostTemplateArguments(CTPSD);
     else if (ForConstraintInstantiation)
-      AddOuterTemplateArguments(CTPSD, CTPSD->getInjectedTemplateArgs(),
+      AddOuterTemplateArguments(CTPSD,
+                                CTPSD->getInjectedTemplateArgs(S.Context),
                                 /*Final=*/false);
 
     if (CTPSD->isMemberSpecialization())
@@ -290,7 +292,7 @@ struct TemplateInstantiationArgumentCollecter
     if (Innermost)
       AddInnermostTemplateArguments(TATD);
     else if (ForConstraintInstantiation)
-      AddOuterTemplateArguments(TATD, TATD->getInjectedTemplateArgs(),
+      AddOuterTemplateArguments(TATD, TATD->getInjectedTemplateArgs(S.Context),
                                 /*Final=*/false);
 
     return UseNextDecl(TATD);
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 7d9170e7f0b4797..8d8f9378cfeabee 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -5756,6 +5756,14 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
     return Err;
 
   ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+  bool KnowsTopLevelModule = ModMap.findModule(F.ModuleName) != nullptr;
+  // If we don't know the top-level module, there's no point in doing qualified
+  // lookup of its submodules; it won't find anything anywhere within this tree.
+  // Let's skip that and avoid some string lookups.
+  auto CreateModule = !KnowsTopLevelModule
+                          ? &ModuleMap::createModule
+                          : &ModuleMap::findOrCreateModuleFirst;
+
   bool First = true;
   Module *CurrentModule = nullptr;
   RecordData Record;
@@ -5813,6 +5821,7 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       SubmoduleID Parent = getGlobalSubmoduleID(F, Record[Idx++]);
       Module::ModuleKind Kind = (Module::ModuleKind)Record[Idx++];
       SourceLocation DefinitionLoc = ReadSourceLocation(F, Record[Idx++]);
+      FileID InferredAllowedBy = ReadFileID(F, Record, Idx);
       bool IsFramework = Record[Idx++];
       bool IsExplicit = Record[Idx++];
       bool IsSystem = Record[Idx++];
@@ -5828,13 +5837,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       if (Parent)
         ParentModule = getSubmodule(Parent);
 
-      // Retrieve this (sub)module from the module map, creating it if
-      // necessary.
-      CurrentModule =
-          ModMap.findOrCreateModule(Name, ParentModule, IsFramework, IsExplicit)
-              .first;
-
-      // FIXME: Call ModMap.setInferredModuleAllowedBy()
+      CurrentModule = std::invoke(CreateModule, &ModMap, Name, ParentModule,
+                                  IsFramework, IsExplicit);
 
       SubmoduleID GlobalIndex = GlobalID - NUM_PREDEF_SUBMODULE_IDS;
       if (GlobalIndex >= SubmodulesLoaded.size() ||
@@ -5866,6 +5870,8 @@ llvm::Error ASTReader::ReadSubmoduleBlock(ModuleFile &F,
       CurrentModule->DefinitionLoc = DefinitionLoc;
       CurrentModule->Signature = F.Signature;
       CurrentModule->IsFromModuleFile = true;
+      if (InferredAllowedBy.isValid())
+        ModMap.setInferredModuleAllowedBy(CurrentModule, InferredAllowedBy);
       CurrentModule->IsSystem = IsSystem || CurrentModule->IsSystem;
       CurrentModule->IsExternC = IsExternC;
       CurrentModule->InferSubmodules = InferSubmodules;
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 494890284d2f2c1..569c688f793d81a 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2163,8 +2163,8 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
       continue; // We have no information on this being a header file.
     if (!HFI->isCompilingModuleHeader && HFI->isModuleHeader)
       continue; // Header file info is tracked by the owning module file.
-    if (!HFI->isCompilingModuleHeader && !PP->alreadyIncluded(*File))
-      continue; // Non-modular header not included is not needed.
+    if (!HFI->isCompilingModuleHeader && !HFI->IsLocallyIncluded)
+      continue; // Header file info is tracked by the including module file.
 
     // Massage the file path into an appropriate form.
     StringRef Filename = File->getName();
@@ -2176,7 +2176,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
       SavedStrings.push_back(Filename.data());
     }
 
-    bool Included = PP->alreadyIncluded(*File);
+    bool Included = HFI->IsLocallyIncluded || PP->alreadyIncluded(*File);
 
     HeaderFileInfoTrait::key_type Key = {
       Filename, File->getSize(), getTimestampForOutput(*File)
@@ -2914,6 +2914,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Kind
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Definition location
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Inferred allowed by
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem
@@ -3018,6 +3019,12 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
     SourceLocationEncoding::RawLocEncoding DefinitionLoc =
         getRawSourceLocationEncoding(getAdjustedLocation(Mod->DefinitionLoc));
 
+    ModuleMap &ModMap = PP->getHeaderSearchInfo().getModuleMap();
+    FileID UnadjustedInferredFID;
+    if (Mod->IsInferred)
+      UnadjustedInferredFID = ModMap.getModuleMapFileIDForUniquing(Mod);
+    int InferredFID = getAdjustedFileID(UnadjustedInferredFID).getOpaqueValue();
+
     // Emit the definition of the block.
     {
       RecordData::value_type Record[] = {SUBMODULE_DEFINITION,
@@ -3025,6 +3032,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
                                          ParentID,
                                          (RecordData::value_type)Mod->Kind,
                                          DefinitionLoc,
+                                         (RecordData::value_type)InferredFID,
                                          Mod->IsFramework,
                                          Mod->IsExplicit,
                                          Mod->IsSystem,
@@ -3070,9 +3078,9 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
         Module::HK_PrivateTextual},
       {SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded}
     };
-    for (auto &HL : HeaderLists) {
+    for (const auto &HL : HeaderLists) {
       RecordData::value_type Record[] = {HL.RecordKind};
-      for (auto &H : Mod->Headers[HL.HeaderKind])
+      for (const auto &H : Mod->getHeaders(HL.HeaderKind))
         Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten);
     }
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index 6da3665ab9a4dfc..62aa5ff7f002a97 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/clang/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -91,8 +91,6 @@ add_clang_library(clangStaticAnalyzerCheckers
   OSObjectCStyleCast.cpp
   PaddingChecker.cpp
   PointerArithChecker.cpp
-  PointerIterationChecker.cpp
-  PointerSortingChecker.cpp
   PointerSubChecker.cpp
   PthreadLockChecker.cpp
   PutenvStackArrayChecker.cpp
diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp
deleted file mode 100644
index 895b2160b76a7b0..000000000000000
--- a/clang/lib/StaticAnalyzer/Checkers/PointerIterationChecker.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//== PointerIterationChecker.cpp ------------------------------- -*- C++ -*--=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines PointerIterationChecker which checks for non-determinism
-// caused due to iteration of unordered containers of pointer elements.
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
-#include "clang/StaticAnalyzer/Core/Checker.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
-
-using namespace clang;
-using namespace ento;
-using namespace ast_matchers;
-
-namespace {
-
-// ID of a node at which the diagnostic would be emitted.
-constexpr llvm::StringLiteral WarnAtNode = "iter";
-
-class PointerIterationChecker : public Checker<check::ASTCodeBody> {
-public:
-  void checkASTCodeBody(const Decl *D,
-                        AnalysisManager &AM,
-                        BugReporter &BR) const;
-};
-
-static void emitDiagnostics(const BoundNodes &Match, const Decl *D,
-                            BugReporter &BR, AnalysisManager &AM,
-                            const PointerIterationChecker *Checker) {
-  auto *ADC = AM.getAnalysisDeclContext(D);
-
-  const auto *MarkedStmt = Match.getNodeAs<Stmt>(WarnAtNode);
-  assert(MarkedStmt);
-
-  auto Range = MarkedStmt->getSourceRange();
-  auto Location = PathDiagnosticLocation::createBegin(MarkedStmt,
-                                                      BR.getSourceManager(),
-                                                      ADC);
-  std::string Diagnostics;
-  llvm::raw_string_ostream OS(Diagnostics);
-  OS << "Iteration of pointer-like elements "
-     << "can result in non-deterministic ordering";
-
-  BR.EmitBasicReport(ADC->getDecl(), Checker,
-                     "Iteration of pointer-like elements", "Non-determinism",
-                     Diagnostics, Location, Range);
-}
-
-// Assumption: Iteration of ordered containers of pointers is deterministic.
-
-// TODO: Currently, we only check for std::unordered_set. Other unordered
-// containers like std::unordered_map also need to be handled.
-
-// TODO: Currently, we do not check what the for loop does with the iterated
-// pointer values. Not all iterations may cause non-determinism. For example,
-// counting or summing up the elements should not be non-deterministic.
-
-auto matchUnorderedIterWithPointers() -> decltype(decl()) {
-
-  auto UnorderedContainerM = declRefExpr(to(varDecl(hasType(
-                               recordDecl(hasName("std::unordered_set")
-                             )))));
-
-  auto PointerTypeM = varDecl(hasType(hasCanonicalType(pointerType())));
-
-  auto PointerIterM = stmt(cxxForRangeStmt(
-                             hasLoopVariable(PointerTypeM),
-                             hasRangeInit(UnorderedContainerM)
-                      )).bind(WarnAtNode);
-
-  return decl(forEachDescendant(PointerIterM));
-}
-
-void PointerIterationChecker::checkASTCodeBody(const Decl *D,
-                                             AnalysisManager &AM,
-                                             BugReporter &BR) const {
-  auto MatcherM = matchUnorderedIterWithPointers();
-
-  auto Matches = match(MatcherM, *D, AM.getASTContext());
-  for (const auto &Match : Matches)
-    emitDiagnostics(Match, D, BR, AM, this);
-}
-
-} // end of anonymous namespace
-
-void ento::registerPointerIterationChecker(CheckerManager &Mgr) {
-  Mgr.registerChecker<PointerIterationChecker>();
-}
-
-bool ento::shouldRegisterPointerIterationChecker(const CheckerManager &mgr) {
-  const LangOptions &LO = mgr.getLangOpts();
-  return LO.CPlusPlus;
-}
diff --git a/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp
deleted file mode 100644
index 25d87f4acfc910c..000000000000000
--- a/clang/lib/StaticAnalyzer/Checkers/PointerSortingChecker.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-//== PointerSortingChecker.cpp --------------------------------- -*- C++ -*--=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines PointerSortingChecker which checks for non-determinism
-// caused due to sorting containers with pointer-like elements.
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
-#include "clang/StaticAnalyzer/Core/Checker.h"
-#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
-
-using namespace clang;
-using namespace ento;
-using namespace ast_matchers;
-
-namespace {
-
-// ID of a node at which the diagnostic would be emitted.
-constexpr llvm::StringLiteral WarnAtNode = "sort";
-
-class PointerSortingChecker : public Checker<check::ASTCodeBody> {
-public:
-  void checkASTCodeBody(const Decl *D,
-                        AnalysisManager &AM,
-                        BugReporter &BR) const;
-};
-
-static void emitDiagnostics(const BoundNodes &Match, const Decl *D,
-                            BugReporter &BR, AnalysisManager &AM,
-                            const PointerSortingChecker *Checker) {
-  auto *ADC = AM.getAnalysisDeclContext(D);
-
-  const auto *MarkedStmt = Match.getNodeAs<CallExpr>(WarnAtNode);
-  assert(MarkedStmt);
-
-  auto Range = MarkedStmt->getSourceRange();
-  auto Location = PathDiagnosticLocation::createBegin(MarkedStmt,
-                                                      BR.getSourceManager(),
-                                                      ADC);
-  std::string Diagnostics;
-  llvm::raw_string_ostream OS(Diagnostics);
-  OS << "Sorting pointer-like elements "
-     << "can result in non-deterministic ordering";
-
-  BR.EmitBasicReport(ADC->getDecl(), Checker,
-                     "Sorting of pointer-like elements", "Non-determinism",
-                     OS.str(), Location, Range);
-}
-
-decltype(auto) callsName(const char *FunctionName) {
-  return callee(functionDecl(hasName(FunctionName)));
-}
-
-// FIXME: Currently we simply check if std::sort is used with pointer-like
-// elements. This approach can have a big false positive rate. Using std::sort,
-// std::unique and then erase is common technique for deduplicating a container
-// (which in some cases might even be quicker than using, let's say std::set).
-// In case a container contains arbitrary memory addresses (e.g. multiple
-// things give different stuff but might give the same thing multiple times)
-// which we don't want to do things with more than once, we might use
-// sort-unique-erase and the sort call will emit a report.
-auto matchSortWithPointers() -> decltype(decl()) {
-  // Match any of these function calls.
-  auto SortFuncM = anyOf(
-                     callsName("std::is_sorted"),
-                     callsName("std::nth_element"),
-                     callsName("std::partial_sort"),
-                     callsName("std::partition"),
-                     callsName("std::sort"),
-                     callsName("std::stable_partition"),
-                     callsName("std::stable_sort")
-                    );
-
-  // Match only if the container has pointer-type elements.
-  auto IteratesPointerEltsM = hasArgument(0,
-                                hasType(cxxRecordDecl(has(
-                                  fieldDecl(hasType(hasCanonicalType(
-                                    pointsTo(hasCanonicalType(pointerType()))
-                                  )))
-                              ))));
-
-  auto PointerSortM = traverse(
-      TK_AsIs,
-      stmt(callExpr(allOf(SortFuncM, IteratesPointerEltsM))).bind(WarnAtNode));
-
-  return decl(forEachDescendant(PointerSortM));
-}
-
-void PointerSortingChecker::checkASTCodeBody(const Decl *D,
-                                             AnalysisManager &AM,
-                                             BugReporter &BR) const {
-  auto MatcherM = matchSortWithPointers();
-
-  auto Matches = match(MatcherM, *D, AM.getASTContext());
-  for (const auto &Match : Matches)
-    emitDiagnostics(Match, D, BR, AM, this);
-}
-
-} // end of anonymous namespace
-
-void ento::registerPointerSortingChecker(CheckerManager &Mgr) {
-  Mgr.registerChecker<PointerSortingChecker>();
-}
-
-bool ento::shouldRegisterPointerSortingChecker(const CheckerManager &mgr) {
-  const LangOptions &LO = mgr.getLangOpts();
-  return LO.CPlusPlus;
-}
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
index b7b2f8a16f07b31..9d34dfd3cea636b 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp
@@ -17,6 +17,10 @@
 
 namespace clang {
 
+bool isSafePtr(clang::CXXRecordDecl *Decl) {
+  return isRefCounted(Decl) || isCheckedPtr(Decl);
+}
+
 bool tryToFindPtrOrigin(
     const Expr *E, bool StopAtFirstRefCountedObj,
     std::function<bool(const clang::Expr *, bool)> callback) {
@@ -31,7 +35,7 @@ bool tryToFindPtrOrigin(
     }
     if (auto *tempExpr = dyn_cast<CXXTemporaryObjectExpr>(E)) {
       if (auto *C = tempExpr->getConstructor()) {
-        if (auto *Class = C->getParent(); Class && isRefCounted(Class))
+        if (auto *Class = C->getParent(); Class && isSafePtr(Class))
           return callback(E, true);
         break;
       }
@@ -56,7 +60,7 @@ bool tryToFindPtrOrigin(
       if (StopAtFirstRefCountedObj) {
         if (auto *ConversionFunc =
                 dyn_cast_or_null<FunctionDecl>(cast->getConversionFunction())) {
-          if (isCtorOfRefCounted(ConversionFunc))
+          if (isCtorOfSafePtr(ConversionFunc))
             return callback(E, true);
         }
       }
@@ -68,7 +72,7 @@ bool tryToFindPtrOrigin(
     if (auto *call = dyn_cast<CallExpr>(E)) {
       if (auto *memberCall = dyn_cast<CXXMemberCallExpr>(call)) {
         if (auto *decl = memberCall->getMethodDecl()) {
-          std::optional<bool> IsGetterOfRefCt = isGetterOfRefCounted(decl);
+          std::optional<bool> IsGetterOfRefCt = isGetterOfSafePtr(decl);
           if (IsGetterOfRefCt && *IsGetterOfRefCt) {
             E = memberCall->getImplicitObjectArgument();
             if (StopAtFirstRefCountedObj) {
@@ -87,7 +91,7 @@ bool tryToFindPtrOrigin(
       }
 
       if (auto *callee = call->getDirectCallee()) {
-        if (isCtorOfRefCounted(callee)) {
+        if (isCtorOfRefCounted(callee) || isCtorOfCheckedPtr(callee)) {
           if (StopAtFirstRefCountedObj)
             return callback(E, true);
 
@@ -95,7 +99,7 @@ bool tryToFindPtrOrigin(
           continue;
         }
 
-        if (isRefType(callee->getReturnType()))
+        if (isSafePtrType(callee->getReturnType()))
           return callback(E, true);
 
         if (isSingleton(callee))
@@ -114,7 +118,7 @@ bool tryToFindPtrOrigin(
     }
     if (auto *ObjCMsgExpr = dyn_cast<ObjCMessageExpr>(E)) {
       if (auto *Method = ObjCMsgExpr->getMethodDecl()) {
-        if (isRefType(Method->getReturnType()))
+        if (isSafePtrType(Method->getReturnType()))
           return callback(E, true);
       }
     }
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
index 71440e6d08a1c9a..2293dcf1d4bd643 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp
@@ -135,7 +135,16 @@ bool isCtorOfRefCounted(const clang::FunctionDecl *F) {
          || FunctionName == "Identifier";
 }
 
-bool isRefType(const clang::QualType T) {
+bool isCtorOfCheckedPtr(const clang::FunctionDecl *F) {
+  assert(F);
+  return isCheckedPtr(safeGetName(F));
+}
+
+bool isCtorOfSafePtr(const clang::FunctionDecl *F) {
+  return isCtorOfRefCounted(F) || isCtorOfCheckedPtr(F);
+}
+
+bool isSafePtrType(const clang::QualType T) {
   QualType type = T;
   while (!type.isNull()) {
     if (auto *elaboratedT = type->getAs<ElaboratedType>()) {
@@ -145,7 +154,7 @@ bool isRefType(const clang::QualType T) {
     if (auto *specialT = type->getAs<TemplateSpecializationType>()) {
       if (auto *decl = specialT->getTemplateName().getAsTemplateDecl()) {
         auto name = decl->getNameAsString();
-        return isRefType(name);
+        return isRefType(name) || isCheckedPtr(name);
       }
       return false;
     }
@@ -177,6 +186,12 @@ std::optional<bool> isUncounted(const CXXRecordDecl* Class)
   return (*IsRefCountable);
 }
 
+std::optional<bool> isUnchecked(const CXXRecordDecl *Class) {
+  if (isCheckedPtr(Class))
+    return false; // Cheaper than below
+  return isCheckedPtrCapable(Class);
+}
+
 std::optional<bool> isUncountedPtr(const QualType T) {
   if (T->isPointerType() || T->isReferenceType()) {
     if (auto *CXXRD = T->getPointeeCXXRecordDecl())
@@ -185,8 +200,16 @@ std::optional<bool> isUncountedPtr(const QualType T) {
   return false;
 }
 
-std::optional<bool> isGetterOfRefCounted(const CXXMethodDecl* M)
-{
+std::optional<bool> isUnsafePtr(const QualType T) {
+  if (T->isPointerType() || T->isReferenceType()) {
+    if (auto *CXXRD = T->getPointeeCXXRecordDecl()) {
+      return isUncounted(CXXRD) || isUnchecked(CXXRD);
+    }
+  }
+  return false;
+}
+
+std::optional<bool> isGetterOfSafePtr(const CXXMethodDecl *M) {
   assert(M);
 
   if (isa<CXXMethodDecl>(M)) {
@@ -194,6 +217,9 @@ std::optional<bool> isGetterOfRefCounted(const CXXMethodDecl* M)
     auto className = safeGetName(calleeMethodsClass);
     auto method = safeGetName(M);
 
+    if (isCheckedPtr(className) && (method == "get" || method == "ptr"))
+      return true;
+
     if ((isRefType(className) && (method == "get" || method == "ptr")) ||
         ((className == "String" || className == "AtomString" ||
           className == "AtomStringImpl" || className == "UniqueString" ||
@@ -205,7 +231,12 @@ std::optional<bool> isGetterOfRefCounted(const CXXMethodDecl* M)
     // FIXME: Currently allowing any Ref<T> -> whatever cast.
     if (isRefType(className)) {
       if (auto *maybeRefToRawOperator = dyn_cast<CXXConversionDecl>(M))
-        return isUncountedPtr(maybeRefToRawOperator->getConversionType());
+        return isUnsafePtr(maybeRefToRawOperator->getConversionType());
+    }
+
+    if (isCheckedPtr(className)) {
+      if (auto *maybeRefToRawOperator = dyn_cast<CXXConversionDecl>(M))
+        return isUnsafePtr(maybeRefToRawOperator->getConversionType());
     }
   }
   return false;
@@ -448,7 +479,7 @@ class TrivialFunctionAnalysisVisitor
     if (!Callee)
       return false;
 
-    std::optional<bool> IsGetterOfRefCounted = isGetterOfRefCounted(Callee);
+    std::optional<bool> IsGetterOfRefCounted = isGetterOfSafePtr(Callee);
     if (IsGetterOfRefCounted && *IsGetterOfRefCounted)
       return true;
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
index 8e6aadf63b6d679..4b41ca96e1df1d3 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h
@@ -63,18 +63,30 @@ std::optional<bool> isUncounted(const clang::CXXRecordDecl* Class);
 /// class, false if not, std::nullopt if inconclusive.
 std::optional<bool> isUncountedPtr(const clang::QualType T);
 
-/// \returns true if Name is a RefPtr, Ref, or its variant, false if not.
-bool isRefType(const std::string &Name);
+/// \returns true if \p T is a RefPtr, Ref, CheckedPtr, CheckedRef, or its
+/// variant, false if not.
+bool isSafePtrType(const clang::QualType T);
 
 /// \returns true if \p F creates ref-countable object from uncounted parameter,
 /// false if not.
 bool isCtorOfRefCounted(const clang::FunctionDecl *F);
 
-/// \returns true if \p T is RefPtr, Ref, or its variant, false if not.
-bool isRefType(const clang::QualType T);
+/// \returns true if \p F creates checked ptr object from uncounted parameter,
+/// false if not.
+bool isCtorOfCheckedPtr(const clang::FunctionDecl *F);
+
+/// \returns true if \p F creates ref-countable or checked ptr object from
+/// uncounted parameter, false if not.
+bool isCtorOfSafePtr(const clang::FunctionDecl *F);
+
+/// \returns true if \p Name is RefPtr, Ref, or its variant, false if not.
+bool isRefType(const std::string &Name);
+
+/// \returns true if \p Name is CheckedRef or CheckedPtr, false if not.
+bool isCheckedPtr(const std::string &Name);
 
 /// \returns true if \p M is getter of a ref-counted class, false if not.
-std::optional<bool> isGetterOfRefCounted(const clang::CXXMethodDecl* Method);
+std::optional<bool> isGetterOfSafePtr(const clang::CXXMethodDecl *Method);
 
 /// \returns true if \p F is a conversion between ref-countable or ref-counted
 /// pointer types.
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
index cea3503fa2c314d..1a5a7309a54f167 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedCallArgsChecker.cpp
@@ -96,6 +96,8 @@ class UncountedCallArgsChecker
           auto name = safeGetName(MD);
           if (name == "ref" || name == "deref")
             return;
+          if (name == "incrementPtrCount" || name == "decrementPtrCount")
+            return;
         }
         auto *E = MemberCallExpr->getImplicitObjectArgument();
         QualType ArgType = MemberCallExpr->getObjectType().getCanonicalType();
diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp
index 81d21100de878db..76a4599cc8d7883 100644
--- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp
@@ -48,6 +48,65 @@ bool isRefcountedStringsHack(const VarDecl *V) {
   return false;
 }
 
+struct GuardianVisitor : public RecursiveASTVisitor<GuardianVisitor> {
+  using Base = RecursiveASTVisitor<GuardianVisitor>;
+
+  const VarDecl *Guardian{nullptr};
+
+public:
+  explicit GuardianVisitor(const VarDecl *Guardian) : Guardian(Guardian) {
+    assert(Guardian);
+  }
+
+  bool VisitBinaryOperator(const BinaryOperator *BO) {
+    if (BO->isAssignmentOp()) {
+      if (auto *VarRef = dyn_cast<DeclRefExpr>(BO->getLHS())) {
+        if (VarRef->getDecl() == Guardian)
+          return false;
+      }
+    }
+    return true;
+  }
+
+  bool VisitCXXConstructExpr(const CXXConstructExpr *CE) {
+    if (auto *Ctor = CE->getConstructor()) {
+      if (Ctor->isMoveConstructor() && CE->getNumArgs() == 1) {
+        auto *Arg = CE->getArg(0)->IgnoreParenCasts();
+        if (auto *VarRef = dyn_cast<DeclRefExpr>(Arg)) {
+          if (VarRef->getDecl() == Guardian)
+            return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) {
+    auto MethodName = safeGetName(MCE->getMethodDecl());
+    if (MethodName == "swap" || MethodName == "leakRef" ||
+        MethodName == "releaseNonNull") {
+      auto *ThisArg = MCE->getImplicitObjectArgument()->IgnoreParenCasts();
+      if (auto *VarRef = dyn_cast<DeclRefExpr>(ThisArg)) {
+        if (VarRef->getDecl() == Guardian)
+          return false;
+      }
+    }
+    return true;
+  }
+
+  bool VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) {
+    if (OCE->isAssignmentOp()) {
+      assert(OCE->getNumArgs() == 2);
+      auto *ThisArg = OCE->getArg(0)->IgnoreParenCasts();
+      if (auto *VarRef = dyn_cast<DeclRefExpr>(ThisArg)) {
+        if (VarRef->getDecl() == Guardian)
+          return false;
+      }
+    }
+    return true;
+  }
+};
+
 bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded,
                                            const VarDecl *MaybeGuardian) {
   assert(Guarded);
@@ -81,7 +140,7 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded,
 
   // We need to skip the first CompoundStmt to avoid situation when guardian is
   // defined in the same scope as guarded variable.
-  bool HaveSkippedFirstCompoundStmt = false;
+  const CompoundStmt *FirstCompondStmt = nullptr;
   for (DynTypedNodeList guardedVarAncestors = ctx.getParents(*Guarded);
        !guardedVarAncestors.empty();
        guardedVarAncestors = ctx.getParents(
@@ -90,12 +149,15 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded,
   ) {
     for (auto &guardedVarAncestor : guardedVarAncestors) {
       if (auto *CStmtAncestor = guardedVarAncestor.get<CompoundStmt>()) {
-        if (!HaveSkippedFirstCompoundStmt) {
-          HaveSkippedFirstCompoundStmt = true;
+        if (!FirstCompondStmt) {
+          FirstCompondStmt = CStmtAncestor;
           continue;
         }
-        if (CStmtAncestor == guardiansClosestCompStmtAncestor)
-          return true;
+        if (CStmtAncestor == guardiansClosestCompStmtAncestor) {
+          GuardianVisitor guardianVisitor(MaybeGuardian);
+          auto *GuardedScope = const_cast<CompoundStmt *>(FirstCompondStmt);
+          return guardianVisitor.TraverseCompoundStmt(GuardedScope);
+        }
       }
     }
   }
@@ -227,6 +289,7 @@ class UncountedLocalVarsChecker
                       if (MaybeGuardianArgCXXRecord) {
                         if (MaybeGuardian->isLocalVarDecl() &&
                             (isRefCounted(MaybeGuardianArgCXXRecord) ||
+                             isCheckedPtr(MaybeGuardianArgCXXRecord) ||
                              isRefcountedStringsHack(MaybeGuardian)) &&
                             isGuardedScopeEmbeddedInGuardianScope(
                                 V, MaybeGuardian))
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index c50db1e0e2f863e..ccc3097e8d2f971 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -513,70 +513,25 @@ ProgramStateRef ExprEngine::updateObjectsUnderConstruction(
 static ProgramStateRef
 bindRequiredArrayElementToEnvironment(ProgramStateRef State,
                                       const ArrayInitLoopExpr *AILE,
-                                      const LocationContext *LCtx, SVal Idx) {
-  // The ctor in this case is guaranteed to be a copy ctor, otherwise we hit a
-  // compile time error.
-  //
-  //  -ArrayInitLoopExpr                <-- we're here
-  //   |-OpaqueValueExpr
-  //   | `-DeclRefExpr                  <-- match this
-  //   `-CXXConstructExpr
-  //     `-ImplicitCastExpr
-  //       `-ArraySubscriptExpr
-  //         |-ImplicitCastExpr
-  //         | `-OpaqueValueExpr
-  //         |   `-DeclRefExpr
-  //         `-ArrayInitIndexExpr
-  //
-  // The resulting expression might look like the one below in an implicit
-  // copy/move ctor.
-  //
-  //   ArrayInitLoopExpr                <-- we're here
-  //   |-OpaqueValueExpr
-  //   | `-MemberExpr                   <-- match this
-  //   |  (`-CXXStaticCastExpr)         <-- move ctor only
-  //   |     `-DeclRefExpr
-  //   `-CXXConstructExpr
-  //     `-ArraySubscriptExpr
-  //       |-ImplicitCastExpr
-  //       | `-OpaqueValueExpr
-  //       |   `-MemberExpr
-  //       |     `-DeclRefExpr
-  //       `-ArrayInitIndexExpr
-  //
-  // The resulting expression for a multidimensional array.
-  // ArrayInitLoopExpr                  <-- we're here
-  // |-OpaqueValueExpr
-  // | `-DeclRefExpr                    <-- match this
-  // `-ArrayInitLoopExpr
-  //   |-OpaqueValueExpr
-  //   | `-ArraySubscriptExpr
-  //   |   |-ImplicitCastExpr
-  //   |   | `-OpaqueValueExpr
-  //   |   |   `-DeclRefExpr
-  //   |   `-ArrayInitIndexExpr
-  //   `-CXXConstructExpr             <-- extract this
-  //     ` ...
-
-  const auto *OVESrc = AILE->getCommonExpr()->getSourceExpr();
+                                      const LocationContext *LCtx, NonLoc Idx) {
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  MemRegionManager &MRMgr = SVB.getRegionManager();
+  ASTContext &Ctx = SVB.getContext();
 
   // HACK: There is no way we can put the index of the array element into the
   // CFG unless we unroll the loop, so we manually select and bind the required
   // parameter to the environment.
-  const auto *CE =
+  const Expr *SourceArray = AILE->getCommonExpr()->getSourceExpr();
+  const auto *Ctor =
       cast<CXXConstructExpr>(extractElementInitializerFromNestedAILE(AILE));
 
-  SVal Base = UnknownVal();
-  if (const auto *ME = dyn_cast<MemberExpr>(OVESrc))
-    Base = State->getSVal(ME, LCtx);
-  else if (const auto *DRE = dyn_cast<DeclRefExpr>(OVESrc))
-    Base = State->getLValue(cast<VarDecl>(DRE->getDecl()), LCtx);
-  else
-    llvm_unreachable("ArrayInitLoopExpr contains unexpected source expression");
-
-  SVal NthElem = State->getLValue(CE->getType(), Idx, Base);
+  const auto *SourceArrayRegion =
+      cast<SubRegion>(State->getSVal(SourceArray, LCtx).getAsRegion());
+  const ElementRegion *ElementRegion =
+      MRMgr.getElementRegion(Ctor->getType(), Idx, SourceArrayRegion, Ctx);
 
-  return State->BindExpr(CE->getArg(0), LCtx, NthElem);
+  return State->BindExpr(Ctor->getArg(0), LCtx,
+                         loc::MemRegionVal(ElementRegion));
 }
 
 void ExprEngine::handleConstructor(const Expr *E,
diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index 0a29a050bbc2bab..02d1358a2001ef4 100644
--- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -722,6 +722,13 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const {
   SmallString<50> buf;
   llvm::raw_svector_ostream os(buf);
 
+  // Enclose subject with single quotes if needed.
+  auto QuoteIfNeeded = [UseQuotes](const Twine &Subject) -> std::string {
+    if (UseQuotes)
+      return ("'" + Subject + "'").str();
+    return Subject.str();
+  };
+
   // Obtain array indices to add them to the variable name.
   const ElementRegion *ER = nullptr;
   while ((ER = R->getAs<ElementRegion>())) {
@@ -751,12 +758,20 @@ std::string MemRegion::getDescriptiveName(bool UseQuotes) const {
   }
 
   // Get variable name.
-  if (R && R->canPrintPrettyAsExpr()) {
-    R->printPrettyAsExpr(os);
-    if (UseQuotes)
-      return (llvm::Twine("'") + os.str() + ArrayIndices + "'").str();
-    else
-      return (llvm::Twine(os.str()) + ArrayIndices).str();
+  if (R) {
+    // MemRegion can be pretty printed.
+    if (R->canPrintPrettyAsExpr()) {
+      R->printPrettyAsExpr(os);
+      return QuoteIfNeeded(llvm::Twine(os.str()) + ArrayIndices);
+    }
+
+    // FieldRegion may have ElementRegion as SuperRegion.
+    if (const auto *FR = R->getAs<FieldRegion>()) {
+      std::string Super = FR->getSuperRegion()->getDescriptiveName(false);
+      if (Super.empty())
+        return "";
+      return QuoteIfNeeded(Super + "." + FR->getDecl()->getName());
+    }
   }
 
   return VariableName;
diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
index 77f9d07175c2c15..637416cd1fc621f 100644
--- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
+++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp
@@ -587,9 +587,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   ModuleMap &ModMapInfo =
       MDC.ScanInstance.getPreprocessor().getHeaderSearchInfo().getModuleMap();
 
-  OptionalFileEntryRef ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M);
-
-  if (ModuleMap) {
+  if (auto ModuleMap = ModMapInfo.getModuleMapFileForUniquing(M)) {
     SmallString<128> Path = ModuleMap->getNameAsRequested();
     ModMapInfo.canonicalizeModuleMapPath(Path);
     MD.ClangModuleMapFile = std::string(Path);
@@ -601,15 +599,13 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
   MDC.ScanInstance.getASTReader()->visitInputFileInfos(
       *MF, /*IncludeSystem=*/true,
       [&](const serialization::InputFileInfo &IFI, bool IsSystem) {
-        // __inferred_module.map is the result of the way in which an implicit
-        // module build handles inferred modules. It adds an overlay VFS with
-        // this file in the proper directory and relies on the rest of Clang to
-        // handle it like normal. With explicitly built modules we don't need
-        // to play VFS tricks, so replace it with the correct module map.
-        if (StringRef(IFI.Filename).ends_with("__inferred_module.map")) {
-          MDC.addFileDep(MD, ModuleMap->getName());
+        // The __inferred_module.map file is an insignificant implementation
+        // detail of implicitly-built modules. The PCM will also report the
+        // actual on-disk module map file that allowed inferring the module,
+        // which is what we need for building the module explicitly
+        // Let's ignore this file.
+        if (StringRef(IFI.Filename).ends_with("__inferred_module.map"))
           return;
-        }
         MDC.addFileDep(MD, IFI.Filename);
       });
 
diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp
index 5673b5cba3f700b..56f54ff168f3e85 100644
--- a/clang/test/AST/ByteCode/placement-new.cpp
+++ b/clang/test/AST/ByteCode/placement-new.cpp
@@ -14,7 +14,9 @@ namespace std {
   template<typename T, typename ...Args>
   constexpr void construct_at(void *p, Args &&...args) {
     new (p) T((Args&&)args...); // both-note {{in call to}} \
-                                // both-note {{placement new would change type of storage from 'int' to 'float'}}
+                                // both-note {{placement new would change type of storage from 'int' to 'float'}} \
+                                // both-note {{construction of subobject of member 'x' of union with active member 'a' is not allowed in a constant expression}}
+
   }
 }
 
@@ -284,6 +286,18 @@ namespace ConstructAt {
   static_assert(bad_construct_at_type()); // both-error {{not an integral constant expression}} \
                                           // both-note {{in call}}
 
+  constexpr bool bad_construct_at_subobject() {
+    struct X { int a, b; };
+    union A {
+      int a;
+      X x;
+    };
+    A a = {1};
+    std::construct_at<int>(&a.x.a, 1); // both-note {{in call}}
+    return true;
+  }
+  static_assert(bad_construct_at_subobject()); // both-error{{not an integral constant expression}} \
+                                               // both-note {{in call}}
 }
 
 namespace UsedToCrash {
diff --git a/clang/test/AST/arm-mfp8.cpp b/clang/test/AST/arm-mfp8.cpp
new file mode 100644
index 000000000000000..51bebba067eb9f6
--- /dev/null
+++ b/clang/test/AST/arm-mfp8.cpp
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -std=c++11 -triple aarch64-arm-none-eabi -target-feature -fp8 -ast-dump %s | \
+// RUN:  FileCheck %s --strict-whitespace
+
+// REQUIRES: aarch64-registered-target || arm-registered-target
+
+/*  Various contexts where type __mfp8 can appear. */
+
+#include<arm_neon.h>
+/*  Namespace */
+namespace {
+  __mfp8 f2n;
+  __mfp8 arr1n[10];
+}
+
+//CHECK:       |-NamespaceDecl {{.*}}
+//CHECK-NEXT:  | |-VarDecl {{.*}} f2n '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | `-VarDecl {{.*}} arr1n '__mfp8[10]'
+
+
+  const __mfp8 func1n(const __mfp8 mfp8) {
+    // this should fail
+    __mfp8 f1n;
+    f1n  = mfp8;
+    return f1n;
+  }
+//CHECK:    |-FunctionDecl {{.*}} func1n 'const __mfp8 (const __mfp8)'
+//CHECK:            | `-VarDecl {{.*}} f1n '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:       |-BinaryOperator {{.*}} '__mfp8':'__MFloat8_t' lvalue '='
+//CHECK-NEXT:       | |-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue Var {{.*}} 'f1n' '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:       | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' <LValueToRValue>
+//CHECK-NEXT:       |   `-DeclRefExpr {{.*}} 'const __mfp8':'const __MFloat8_t' lvalue ParmVar {{.*}} 'mfp8' 'const __mfp8':'const __MFloat8_t'
+//CHECK-NEXT:        `-ReturnStmt {{.*}}
+//CHECK-NEXT:         `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' <LValueToRValue>
+//CHECK-NEXT:           `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue Var {{.*}} 'f1n' '__mfp8':'__MFloat8_t'
+
+
+/* Class */
+
+class C1 {
+  __mfp8 f1c;
+  static const __mfp8 f2c;
+  volatile __MFloat8_t f3c;
+public:
+  C1(__mfp8 arg) : f1c(arg), f3c(arg) { }
+  __mfp8 func1c(__mfp8 arg ) {
+    return  arg;
+  }
+  static __mfp8 func2c(__mfp8 arg) {
+    return arg;
+  }
+};
+
+//CHECK:       | |-CXXRecordDecl {{.*}} referenced class C1
+//CHECK-NEXT:  | |-FieldDecl {{.*}} f1c '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | |-VarDecl {{.*}} f2c 'const __mfp8':'const __MFloat8_t' static
+//CHECK-NEXT:  | |-FieldDecl {{.*}} f3c 'volatile __MFloat8_t'
+//CHECK-NEXT:  | |-AccessSpecDecl {{.*}}
+//CHECK-NEXT:  | |-CXXConstructorDecl {{.*}} C1 'void (__mfp8)' implicit-inline
+//CHECK-NEXT:  | | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | | |-CXXCtorInitializer {{.*}} 'f1c' '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' <LValueToRValue>
+//CHECK-NEXT:  | | |   `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | | |-CXXCtorInitializer {{.*}} 'f3c' 'volatile __MFloat8_t'
+//CHECK-NEXT:  | | | `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' <LValueToRValue>
+//CHECK-NEXT:  | | |   `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | | `-CompoundStmt {{.*}}
+//CHECK-NEXT:  | |-CXXMethodDecl {{.*}} func1c '__mfp8 (__mfp8)' implicit-inline
+//CHECK-NEXT:  | | |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | | `-CompoundStmt {{.*}}
+//CHECK-NEXT:  | |   `-ReturnStmt {{.*}}
+//CHECK-NEXT:  | |     `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' <LValueToRValue>
+//CHECK-NEXT:  | |       `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  | `-CXXMethodDecl {{.*}} func2c '__mfp8 (__mfp8)' static implicit-inline
+//CHECK-NEXT:  |   |-ParmVarDecl {{.*}} arg '__mfp8':'__MFloat8_t'
+//CHECK-NEXT:  |   `-CompoundStmt {{.*}}
+//CHECK-NEXT:  |     `-ReturnStmt {{.*}}
+//CHECK-NEXT:  |       `-ImplicitCastExpr {{.*}} '__mfp8':'__MFloat8_t' <LValueToRValue>
+//CHECK-NEXT:  |         `-DeclRefExpr {{.*}} '__mfp8':'__MFloat8_t' lvalue ParmVar {{.*}} 'arg' '__mfp8':'__MFloat8_t'
+
+template <class C> struct S1 {
+  C mem1;
+};
+
+template <> struct S1<__mfp8> {
+  __mfp8 mem2;
+};
+
+//CHECK:       |-TemplateArgument type '__MFloat8_t'
+//CHECK-NEXT:  | `-BuiltinType {{.*}} '__MFloat8_t'
+//CHECK-NEXT:  |-CXXRecordDecl {{.*}} implicit struct S1
+//CHECK-NEXT:  `-FieldDecl {{.*}} mem2 '__mfp8':'__MFloat8_t'
diff --git a/clang/test/AST/ast-dump-amdgpu-types.c b/clang/test/AST/ast-dump-amdgpu-types.c
index e032d678f1a09e8..f01461cdba2374e 100644
--- a/clang/test/AST/ast-dump-amdgpu-types.c
+++ b/clang/test/AST/ast-dump-amdgpu-types.c
@@ -1,10 +1,15 @@
 // REQUIRES: amdgpu-registered-target
 // Test without serialization:
-// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_buffer_rsrc_t %s | FileCheck %s -check-prefix=BUFFER-RSRC
+// RUN: %clang_cc1 -triple amdgcn -ast-dump -ast-dump-filter __amdgpu_named_workgroup_barrier %s | FileCheck %s -check-prefix=WORKGROUP-BARRIER
 //
 // Test with serialization:
 // RUN: %clang_cc1 -triple amdgcn -emit-pch -o %t %s
-// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" | FileCheck %s
+// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_buffer_rsrc_t /dev/null | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" | FileCheck %s -check-prefix=BUFFER-RSRC
+// RUN: %clang_cc1 -x c -triple amdgcn -include-pch %t -ast-dump-all -ast-dump-filter __amdgpu_named_workgroup_barrier /dev/null | sed -e "s/ <undeserialized declarations>//" -e "s/ imported//" | FileCheck %s -check-prefix=WORKGROUP-BARRIER
 
-// CHECK: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t
-// CHECK-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t'
+// BUFFER-RSRC: TypedefDecl {{.*}} implicit __amdgpu_buffer_rsrc_t
+// BUFFER-RSRC-NEXT: -BuiltinType {{.*}} '__amdgpu_buffer_rsrc_t'
+
+// WORKGROUP-BARRIER: TypedefDecl {{.*}} implicit __amdgpu_named_workgroup_barrier_t
+// WORKGROUP-BARRIER-NEXT: -BuiltinType {{.*}} '__amdgpu_named_workgroup_barrier_t'
diff --git a/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp b/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp
new file mode 100644
index 000000000000000..49b6bfcd7cadfdc
--- /dev/null
+++ b/clang/test/Analysis/Checkers/WebKit/call-args-checked.cpp
@@ -0,0 +1,46 @@
+// RUN: %clang_analyze_cc1 -analyzer-checker=alpha.webkit.UncountedCallArgsChecker -verify %s
+
+#include "mock-types.h"
+
+RefCountableAndCheckable* makeObj();
+CheckedRef<RefCountableAndCheckable> makeObjChecked();
+void someFunction(RefCountableAndCheckable*);
+
+namespace call_args_unchecked_uncounted {
+
+static void foo() {
+  someFunction(makeObj());
+  // expected-warning@-1{{Call argument is uncounted and unsafe [alpha.webkit.UncountedCallArgsChecker]}}
+}
+
+} // namespace call_args_checked
+
+namespace call_args_checked {
+
+static void foo() {
+  CheckedPtr<RefCountableAndCheckable> ptr = makeObj();
+  someFunction(ptr.get());
+}
+
+static void bar() {
+  someFunction(CheckedPtr { makeObj() }.get());
+}
+
+static void baz() {
+  someFunction(makeObjChecked().ptr());
+}
+
+} // namespace call_args_checked
+
+namespace call_args_default {
+
+void someFunction(RefCountableAndCheckable* = makeObj());
+// expected-warning@-1{{Call argument is uncounted and unsafe [alpha.webkit.UncountedCallArgsChecker]}}
+void otherFunction(RefCountableAndCheckable* = makeObjChecked().ptr());
+
+void foo() {
+  someFunction();
+  otherFunction();
+}
+
+}
diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h
index 933b4c5e62a79cc..82c79c97a83de60 100644
--- a/clang/test/Analysis/Checkers/WebKit/mock-types.h
+++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h
@@ -49,7 +49,23 @@ template <typename T, typename PtrTraits = RawPtrTraits<T>, typename RefDerefTra
   Ref() : t{} {};
   Ref(T &t) : t(&RefDerefTraits::ref(t)) { }
   Ref(const Ref& o) : t(RefDerefTraits::refIfNotNull(PtrTraits::unwrap(o.t))) { }
+  Ref(Ref&& o) : t(o.leakRef()) { }
   ~Ref() { RefDerefTraits::derefIfNotNull(PtrTraits::exchange(t, nullptr)); }
+  Ref& operator=(T &t) {
+    Ref o(t);
+    swap(o);
+    return *this;
+  }
+  Ref& operator=(Ref &&o) {
+    Ref m(o);
+    swap(m);
+    return *this;
+  }
+  void swap(Ref& o) {
+    typename PtrTraits::StorageType tmp = t;
+    t = o.t;
+    o.t = tmp;
+  }
   T &get() { return *PtrTraits::unwrap(t); }
   T *ptr() { return PtrTraits::unwrap(t); }
   T *operator->() { return PtrTraits::unwrap(t); }
@@ -74,11 +90,27 @@ template <typename T> struct RefPtr {
     if (t)
       t->deref();
   }
+  Ref<T> releaseNonNull() {
+    Ref<T> tmp(*t);
+    if (t)
+      t->deref();
+    t = nullptr;
+    return tmp;
+  }
+  void swap(RefPtr& o) {
+    T* tmp = t;
+    t = o.t;
+    o.t = tmp;
+  }
   T *get() { return t; }
   T *operator->() { return t; }
   const T *operator->() const { return t; }
   T &operator*() { return *t; }
-  RefPtr &operator=(T *) { return *this; }
+  RefPtr &operator=(T *t) {
+    RefPtr o(t);
+    swap(o);
+    return *this;
+  }
   operator bool() const { return t; }
 };
 
@@ -114,8 +146,8 @@ template <typename T> struct CheckedRef {
 
 public:
   CheckedRef() : t{} {};
-  CheckedRef(T &t) : t(t) { t->incrementPtrCount(); }
-  CheckedRef(const CheckedRef& o) : t(o.t) { if (t) t->incrementPtrCount(); }
+  CheckedRef(T &t) : t(&t) { t.incrementPtrCount(); }
+  CheckedRef(const CheckedRef &o) : t(o.t) { if (t) t->incrementPtrCount(); }
   ~CheckedRef() { if (t) t->decrementPtrCount(); }
   T &get() { return *t; }
   T *ptr() { return t; }
@@ -135,7 +167,7 @@ template <typename T> struct CheckedPtr {
     if (t)
       t->incrementPtrCount();
   }
-  CheckedPtr(Ref<T>&& o)
+  CheckedPtr(Ref<T> &&o)
     : t(o.leakRef())
   { }
   ~CheckedPtr() {
@@ -156,4 +188,14 @@ class CheckedObj {
   void decrementPtrCount();
 };
 
+class RefCountableAndCheckable {
+public:
+  void incrementPtrCount() const;
+  void decrementPtrCount() const;
+  void ref() const;
+  void deref() const;
+  void method();
+  int trivial() { return 0; }
+};
+
 #endif
diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp
index b5f6b8535bf4181..d7fb689557a6fcf 100644
--- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp
+++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp
@@ -83,6 +83,83 @@ void foo7(RefCountable* obj) {
   bar.obj->method();
 }
 
+void foo8(RefCountable* obj) {
+  RefPtr<RefCountable> foo;
+  {
+    RefCountable *bar = foo.get();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    foo = nullptr;
+    bar->method();
+  }
+  RefPtr<RefCountable> baz;
+  {
+    RefCountable *bar = baz.get();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    baz = obj;
+    bar->method();
+  }
+  foo = nullptr;
+  {
+    RefCountable *bar = foo.get();
+    // No warning. It's okay to mutate RefPtr in an outer scope.
+    bar->method();
+  }
+  foo = obj;
+  {
+    RefCountable *bar = foo.get();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    foo.releaseNonNull();
+    bar->method();
+  }
+  {
+    RefCountable *bar = foo.get();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    foo = obj ? obj : nullptr;
+    bar->method();
+  }
+  {
+    RefCountable *bar = foo->trivial() ? foo.get() : nullptr;
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    foo = nullptr;
+    bar->method();
+  }
+}
+
+void foo9(RefCountable& o) {
+  Ref<RefCountable> guardian(o);
+  {
+    RefCountable &bar = guardian.get();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    guardian = o; // We don't detect that we're setting it to the same value.
+    bar.method();
+  }
+  {
+    RefCountable *bar = guardian.ptr();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    Ref<RefCountable> other(*bar); // We don't detect other has the same value as guardian.
+    guardian.swap(other);
+    bar->method();
+  }
+  {
+    RefCountable *bar = guardian.ptr();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    Ref<RefCountable> other(static_cast<Ref<RefCountable>&&>(guardian));
+    bar->method();
+  }
+  {
+    RefCountable *bar = guardian.ptr();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    guardian.leakRef();
+    bar->method();
+  }
+  {
+    RefCountable *bar = guardian.ptr();
+    // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    guardian = o.trivial() ? o : *bar;
+    bar->method();
+  }
+}
+
 } // namespace guardian_scopes
 
 namespace auto_keyword {
@@ -290,6 +367,57 @@ void foo() {
 
 } // namespace local_assignment_to_global
 
+namespace local_refcountable_checkable_object {
+
+RefCountableAndCheckable* provide_obj();
+
+void local_raw_ptr() {
+  RefCountableAndCheckable* a = nullptr;
+  // expected-warning@-1{{Local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+  a = provide_obj();
+  a->method();
+}
+
+void local_checked_ptr() {
+  CheckedPtr<RefCountableAndCheckable> a = nullptr;
+  a = provide_obj();
+  a->method();
+}
+
+void local_var_with_guardian_checked_ptr() {
+  CheckedPtr<RefCountableAndCheckable> a = provide_obj();
+  {
+    auto* b = a.get();
+    b->method();
+  }
+}
+
+void local_var_with_guardian_checked_ptr_with_assignment() {
+  CheckedPtr<RefCountableAndCheckable> a = provide_obj();
+  {
+    RefCountableAndCheckable* b = a.get();
+    // expected-warning@-1{{Local variable 'b' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+    b = provide_obj();
+    b->method();
+  }
+}
+
+void local_var_with_guardian_checked_ref() {
+  CheckedRef<RefCountableAndCheckable> a = *provide_obj();
+  {
+    RefCountableAndCheckable& b = a;
+    b.method();
+  }
+}
+
+void static_var() {
+  static RefCountableAndCheckable* a = nullptr;
+  // expected-warning@-1{{Static local variable 'a' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}}
+  a = provide_obj();
+}
+
+} // namespace local_refcountable_checkable_object
+
 namespace local_var_in_recursive_function {
 
 struct TreeNode {
diff --git a/clang/test/Analysis/array-init-loop.cpp b/clang/test/Analysis/array-init-loop.cpp
index 4ab4489fc882f3f..b28468b7f560b2c 100644
--- a/clang/test/Analysis/array-init-loop.cpp
+++ b/clang/test/Analysis/array-init-loop.cpp
@@ -330,3 +330,41 @@ void no_crash() {
 }
 
 } // namespace crash
+
+namespace array_subscript_initializer {
+struct S {
+  int x;
+};
+
+void no_crash() {
+  S arr[][2] = {{1, 2}};
+
+  const auto [a, b] = arr[0]; // no-crash
+
+  clang_analyzer_eval(a.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(b.x == 2); // expected-warning{{TRUE}}
+}
+} // namespace array_subscript_initializer
+
+namespace iterator_initializer {
+struct S {
+  int x;
+};
+
+void no_crash() {
+  S arr[][2] = {{1, 2}, {3, 4}};
+
+  int i = 0;
+  for (const auto [a, b] : arr) { // no-crash
+    if (i == 0) {
+      clang_analyzer_eval(a.x == 1); // expected-warning{{TRUE}}
+      clang_analyzer_eval(b.x == 2); // expected-warning{{TRUE}}
+    } else {
+      clang_analyzer_eval(a.x == 3); // expected-warning{{TRUE}}
+      clang_analyzer_eval(b.x == 4); // expected-warning{{TRUE}}
+    }
+
+    ++i;
+  }
+}
+} // namespace iterator_initializer
diff --git a/clang/test/Analysis/ptr-iter.cpp b/clang/test/Analysis/ptr-iter.cpp
deleted file mode 100644
index a94288cd1c8cccb..000000000000000
--- a/clang/test/Analysis/ptr-iter.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// RUN: %clang_analyze_cc1 %s -std=c++14 -analyzer-output=text -verify \
-// RUN: -analyzer-checker=core,alpha.nondeterminism.PointerIteration
-
-#include "Inputs/system-header-simulator-cxx.h"
-
-template<class T>
-void f(T x);
-
-void PointerIteration() {
-  int a = 1, b = 2;
-  std::set<int> OrderedIntSet = {a, b};
-  std::set<int *> OrderedPtrSet = {&a, &b};
-  std::unordered_set<int> UnorderedIntSet = {a, b};
-  std::unordered_set<int *> UnorderedPtrSet = {&a, &b};
-
-  for (auto i : OrderedIntSet) // no-warning
-    f(i);
-
-  for (auto i : OrderedPtrSet) // no-warning
-    f(i);
-
-  for (auto i : UnorderedIntSet) // no-warning
-    f(i);
-
-  for (auto i : UnorderedPtrSet) // expected-warning {{Iteration of pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerIteration]
-// expected-note@-1 {{Iteration of pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerIteration]
-    f(i);
-}
diff --git a/clang/test/Analysis/ptr-sort.cpp b/clang/test/Analysis/ptr-sort.cpp
deleted file mode 100644
index d238b390bdc2357..000000000000000
--- a/clang/test/Analysis/ptr-sort.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-// RUN: %clang_analyze_cc1 %s -std=c++14 -analyzer-output=text -verify \
-// RUN: -analyzer-checker=core,alpha.nondeterminism.PointerSorting
-
-#include "Inputs/system-header-simulator-cxx.h"
-
-bool f(int x) { return true; }
-bool g(int *x) { return true; }
-
-void PointerSorting() {
-  int a = 1, b = 2;
-  std::vector<int> V1 = {a, b};
-  std::vector<int *> V2 = {&a, &b};
-
-  std::is_sorted(V1.begin(), V1.end());                    // no-warning
-  std::nth_element(V1.begin(), V1.begin() + 1, V1.end());  // no-warning
-  std::partial_sort(V1.begin(), V1.begin() + 1, V1.end()); // no-warning
-  std::sort(V1.begin(), V1.end());                         // no-warning
-  std::stable_sort(V1.begin(), V1.end());                  // no-warning
-  std::partition(V1.begin(), V1.end(), f);                 // no-warning
-  std::stable_partition(V1.begin(), V1.end(), g);          // no-warning
-
-  std::is_sorted(V2.begin(), V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  std::nth_element(V2.begin(), V2.begin() + 1, V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  std::partial_sort(V2.begin(), V2.begin() + 1, V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  std::sort(V2.begin(), V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  std::stable_sort(V2.begin(), V2.end()); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  std::partition(V2.begin(), V2.end(), f); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  std::stable_partition(V2.begin(), V2.end(), g); // expected-warning {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-  // expected-note@-1 {{Sorting pointer-like elements can result in non-deterministic ordering}} [alpha.nondeterminism.PointerSorting]
-}
diff --git a/clang/test/CXX/drs/cwg1884.cpp b/clang/test/CXX/drs/cwg1884.cpp
new file mode 100644
index 000000000000000..c4f76baa3933fbe
--- /dev/null
+++ b/clang/test/CXX/drs/cwg1884.cpp
@@ -0,0 +1,643 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file --leading-lines %s %t
+// RUN: %clang_cc1 -std=c++20 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg1884_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg1884_A.pcm
+// RUN: %clang_cc1 -std=c++20 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg1884.cpp -fmodule-file=cwg1884_A=%t/cwg1884_A.pcm
+// RUN: %clang_cc1 -std=c++23 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg1884_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg1884_A.pcm
+// RUN: %clang_cc1 -std=c++23 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg1884.cpp -fmodule-file=cwg1884_A=%t/cwg1884_A.pcm
+// RUN: %clang_cc1 -std=c++2c -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg1884_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg1884_A.pcm
+// RUN: %clang_cc1 -std=c++2c -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg1884.cpp -fmodule-file=cwg1884_A=%t/cwg1884_A.pcm
+
+// cwg1884: partial
+// Cases b11, e11, g3, g4 are problematic, but we handle the other 101 cases fine.
+
+// _N4993_.[basic.link]/11:
+// For any two declarations of an entity E:
+//   — If one declares E to be a variable or function,
+//     the other shall declare E as one of the same type.
+//   — If one declares E to be an enumerator, the other shall do so.
+//   — If one declares E to be a namespace, the other shall do so.
+//   — If one declares E to be a type,
+//     the other shall declare E to be a type of the same kind (9.2.9.5).
+//   — If one declares E to be a class template,
+//     the other shall do so with the same kind and an equivalent template-head (13.7.7.2).
+//     [Note 5 : The declarations can supply different default template arguments. — end note]
+//   — If one declares E to be a function template or a (partial specialization of a) variable template,
+//     the other shall declare E to be one with an equivalent template-head and type.
+//   — If one declares E to be an alias template,
+//     the other shall declare E to be one with an equivalent template-head and defining-type-id.
+//   — If one declares E to be a concept, the other shall do so.
+// Types are compared after all adjustments of types (during which typedefs (9.2.4) are replaced by their definitions);
+// declarations for an array object can specify array types that differ by the presence or absence of a major array bound (9.3.4.5).
+// No diagnostic is required if neither declaration is reachable from the other.
+
+// The structure of the test is the following. First, module cwg1884_A
+// provides all (significant) kinds of entities, each named 'a' through 'h', and copies of them.
+// Then the .cpp file does MxN kind of testing, where it tests one kind of entity against every other kind.
+
+//--- cwg1884_A.cppm
+export module cwg1884_A;
+
+export {
+int a1;
+int a2;
+int a3;
+int a4;
+int a5;
+int a6;
+int a7;
+int a8;
+int a9;
+int a10;
+int a11;
+void b1();
+void b2();
+void b3();
+void b4();
+void b5();
+void b6();
+void b7();
+void b8();
+void b9();
+void b10();
+void b11();
+enum E {
+  c1,
+  c2, 
+  c3,
+  c4,
+  c5,
+  c6,
+  c7,
+  c8,
+  c9,
+  c10
+};
+namespace d1 {}
+namespace d2 {}
+namespace d3 {}
+namespace d4 {}
+namespace d5 {}
+namespace d6 {}
+namespace d7 {}
+namespace d8 {}
+namespace d9 {}
+namespace d10 {}
+struct e1;
+struct e2;
+struct e3;
+struct e4;
+struct e5;
+struct e6;
+struct e7;
+struct e8;
+struct e9;
+struct e10;
+struct e11;
+struct e12;
+struct e13;
+template <typename>
+class f1;
+template <typename>
+class f2;
+template <typename>
+class f3;
+template <typename>
+class f4;
+template <typename>
+class f5;
+template <typename>
+class f6;
+template <typename>
+class f7;
+template <typename>
+class f8;
+template <typename>
+class f9;
+template <typename>
+class f10;
+template <typename>
+class f11;
+template <typename>
+void g1(int);
+template <typename>
+void g2(int);
+template <typename>
+void g3(int);
+template <typename>
+void g4(int);
+template <typename>
+void g5(int);
+template <typename>
+void g6(int);
+template <typename>
+void g7(int);
+template <typename>
+void g8(int);
+template <typename>
+void g9(int);
+template <typename>
+void g10(int);
+template <typename, typename>
+int h1;
+template <typename, typename>
+int h2;
+template <typename, typename>
+int h3;
+template <typename, typename>
+int h4;
+template <typename, typename>
+int h5;
+template <typename, typename>
+int h6;
+template <typename, typename>
+int h7;
+template <typename, typename>
+int h8;
+template <typename, typename>
+int h9;
+template <typename, typename>
+int h10;
+template <typename>
+using i1 = int;
+template <typename>
+using i2 = int;
+template <typename>
+using i3 = int;
+template <typename>
+using i4 = int;
+template <typename>
+using i5 = int;
+template <typename>
+using i6 = int;
+template <typename>
+using i7 = int;
+template <typename>
+using i8 = int;
+template <typename>
+using i9 = int;
+template <typename>
+using i10 = int;
+template <typename>
+using i11 = int;
+template <typename>
+concept j1 = true;
+template <typename>
+concept j2 = true;
+template <typename>
+concept j3 = true;
+template <typename>
+concept j4 = true;
+template <typename>
+concept j5 = true;
+template <typename>
+concept j6 = true;
+template <typename>
+concept j7 = true;
+template <typename>
+concept j8 = true;
+template <typename>
+concept j9 = true;
+template <typename>
+concept j10 = true;
+template <typename>
+concept j11 = true;
+} // export
+
+
+//--- cwg1884.cpp
+import cwg1884_A;
+
+// FIXME: we don't diagnose several cases we should be. They are marked with MISSING prefix.
+
+// Part A: matching against `int a;`
+// ---------------------------------
+
+void a1();
+// since-cxx20-error@-1 {{redefinition of 'a1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:42 {{previous definition is here}}
+enum Ea {
+  a2
+  // since-cxx20-error@-1 {{redefinition of 'a2'}}
+  //   since-cxx20-note@cwg1884_A.cppm:43 {{previous definition is here}}
+};
+namespace a3 {}
+// since-cxx20-error@-1 {{redefinition of 'a3' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:44 {{previous definition is here}}
+struct a4;
+// @-1 OK, types and variables do not correspond
+template <typename>
+class a5;
+// since-cxx20-error@-1 {{redefinition of 'a5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:46 {{previous definition is here}}
+template <typename>
+void a6(int);
+// since-cxx20-error@-1 {{redefinition of 'a6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:47 {{previous definition is here}}
+template <typename, typename>
+int a7;
+// since-cxx20-error@-1 {{redefinition of 'a7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:48 {{previous definition is here}}
+template <typename T>
+int a8<T, int>;
+// since-cxx20-error@-1 {{redefinition of 'a8' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:49 {{previous definition is here}}
+// since-cxx20-error@-3 {{expected ';' after top level declarator}}
+template <typename>
+using a9 = int;
+// since-cxx20-error@-1 {{redefinition of 'a9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:50 {{previous definition is here}}
+template <typename>
+concept a10 = true;
+// since-cxx20-error@-1 {{redefinition of 'a10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:51 {{previous definition is here}}
+// For variables, type has to match as well.
+long a11;
+// since-cxx20-error@-1 {{redefinition of 'a11' with a different type: 'long' vs 'int'}}
+//   since-cxx20-note@cwg1884_A.cppm:52 {{previous definition is here}}
+
+
+// Part B: matching against `void b();`
+// ------------------------------------
+
+int b1;
+// since-cxx20-error@-1 {{redefinition of 'b1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:53 {{previous definition is here}}
+enum Eb {
+  b2
+  // since-cxx20-error@-1 {{redefinition of 'b2'}}
+  //   since-cxx20-note@cwg1884_A.cppm:54 {{previous definition is here}}
+};
+namespace b3 {} // #cwg1884-namespace-b
+// since-cxx20-error@-1 {{redefinition of 'b3' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:55 {{previous definition is here}}
+struct b4;
+// @-1 OK, types and functions do not correspond
+template <typename>
+class b5;
+// since-cxx20-error@-1 {{redefinition of 'b5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:57 {{previous definition is here}}
+template <typename>
+void b6(int);
+// @-1 OK, a non-corresponding overload
+template <typename, typename>
+int b7;
+// since-cxx20-error@-1 {{redefinition of 'b7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:59 {{previous definition is here}}
+template <typename T>
+int b8<T, int>;
+// since-cxx20-error@-1 {{no variable template matches partial specialization}}
+template <typename>
+using b9 = int;
+// since-cxx20-error@-1 {{redefinition of 'b9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:61 {{previous definition is here}}
+template <typename>
+concept b10 = true;
+// since-cxx20-error@-1 {{redefinition of 'b10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:62 {{previous definition is here}}
+// For functions, type has to match as well.
+// FIXME: we should be loud and clear here about type mismatch, like we do in `a11` case.
+int b11();
+// since-cxx20-error@-1 {{declaration of 'b11' in the global module follows declaration in module cwg1884_A}}
+//   since-cxx20-note@cwg1884_A.cppm:63 {{previous declaration is here}}
+
+
+// Part C: matching against `enum E { c };`
+// ----------------------------------------
+
+int c1;
+// since-cxx20-error@-1 {{redefinition of 'c1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:65 {{previous definition is here}}
+void c2();
+// since-cxx20-error@-1 {{redefinition of 'c2' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:66 {{previous definition is here}}
+namespace c3 {}
+// since-cxx20-error@-1 {{redefinition of 'c3' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:67 {{previous definition is here}}
+struct c4;
+// @-1 OK, types and enumerators do not correspond
+template <typename>
+class c5;
+// since-cxx20-error@-1 {{redefinition of 'c5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:69 {{previous definition is here}}
+template <typename>
+void c6(int);
+// since-cxx20-error@-1 {{redefinition of 'c6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:70 {{previous definition is here}}
+template <typename, typename>
+int c7;
+// since-cxx20-error@-1 {{redefinition of 'c7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:71 {{previous definition is here}}
+template <typename T>
+int c8<T, int>;
+// since-cxx20-error@-1 {{redefinition of 'c8' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:72 {{previous definition is here}}
+// since-cxx20-error@-3 {{expected ';' after top level declarator}}
+template <typename>
+using c9 = int;
+// since-cxx20-error@-1 {{redefinition of 'c9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:73 {{previous definition is here}}
+template <typename>
+concept c10 = true;
+// since-cxx20-error@-1 {{redefinition of 'c10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:74 {{previous definition is here}}
+
+
+// Part D: matching against `namespace d {};`
+// ------------------------------------------
+
+int d1;
+// since-cxx20-error@-1 {{redefinition of 'd1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:76 {{previous definition is here}}
+void d2();
+// since-cxx20-error@-1 {{redefinition of 'd2' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:77 {{previous definition is here}}
+enum Ed {
+  d3
+  // since-cxx20-error@-1 {{redefinition of 'd3'}}
+  //   since-cxx20-note@cwg1884_A.cppm:78 {{previous definition is here}}
+};
+struct d4;
+// since-cxx20-error@-1 {{redefinition of 'd4' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:79 {{previous definition is here}}
+template <typename>
+class d5;
+// since-cxx20-error@-1 {{redefinition of 'd5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:80 {{previous definition is here}}
+template <typename>
+void d6(int);
+// since-cxx20-error@-1 {{redefinition of 'd6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:81 {{previous definition is here}}
+template <typename, typename>
+int d7;
+// since-cxx20-error@-1 {{redefinition of 'd7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:82 {{previous definition is here}}
+template <typename T>
+int d8<T, int>;
+// since-cxx20-error@-1 {{redefinition of 'd8' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:83 {{previous definition is here}}
+// since-cxx20-error@-3 {{expected ';' after top level declarator}}
+template <typename>
+using d9 = int;
+// since-cxx20-error@-1 {{redefinition of 'd9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:84 {{previous definition is here}}
+template <typename>
+concept d10 = true;
+// since-cxx20-error@-1 {{redefinition of 'd10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:85 {{previous definition is here}}
+
+
+// Part E: matching against `struct e;`
+// ------------------------------------
+
+int e1;
+// @-1 OK, types and variables do not correspond
+void e2();
+// @-1 OK, types and functions do not correspond
+enum Ee {
+  e3
+  // @-1 OK, types and enumerators do not correspond
+};
+namespace e4 {}
+// since-cxx20-error@-1 {{redefinition of 'e4' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:89 {{previous definition is here}}
+template <typename>
+class e5;
+// since-cxx20-error@-1 {{redefinition of 'e5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:90 {{previous definition is here}}
+template <typename>
+void e6(int);
+// @-1 OK, types and function templates do not correspond
+template <typename, typename>
+int e7;
+// since-cxx20-error@-1 {{redefinition of 'e7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:92 {{previous definition is here}}
+template <typename T>
+int e8<T, int>;
+// since-cxx20-error@-1 {{redefinition of 'e8' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:93 {{previous definition is here}}
+// since-cxx20-error@-3 {{expected ';' after top level declarator}}
+template <typename>
+using e9 = int;
+// since-cxx20-error@-1 {{redefinition of 'e9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:94 {{previous definition is here}}
+template <typename>
+concept e10 = true;
+// since-cxx20-error@-1 {{redefinition of 'e10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:95 {{previous definition is here}}
+// FIXME: the following forward declaration is well-formed.
+//        Agreement on 'struct' vs 'class' is not required per [dcl.type.elab]/7.
+class e11;
+// since-cxx20-error@-1 {{declaration of 'e11' in the global module follows declaration in module cwg1884_A}}
+//   since-cxx20-note@cwg1884_A.cppm:96 {{previous declaration is here}}
+union e12;
+// since-cxx20-error@-1 {{use of 'e12' with tag type that does not match previous declaration}}
+//   since-cxx20-note@cwg1884_A.cppm:97 {{previous use is here}}
+// since-cxx20-error@-3 {{declaration of 'e12' in the global module follows declaration in module cwg1884_A}}
+//   since-cxx20-note@cwg1884_A.cppm:97 {{previous declaration is here}}
+enum e13 {};
+// since-cxx20-error@-1 {{use of 'e13' with tag type that does not match previous declaration}}
+//   since-cxx20-note@cwg1884_A.cppm:98 {{previous use is here}}
+
+
+// Part F: matching against `template <typename> class f;`
+// -------------------------------------------------------
+
+int f1;
+// since-cxx20-error@-1 {{redefinition of 'f1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:100 {{previous definition is here}}
+void f2();
+// since-cxx20-error@-1 {{redefinition of 'f2' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:102 {{previous definition is here}}
+enum Ef {
+  f3
+  // since-cxx20-error@-1 {{redefinition of 'f3'}}
+  //   since-cxx20-note@cwg1884_A.cppm:104 {{previous definition is here}}
+};
+namespace f4 {}
+// since-cxx20-error@-1 {{redefinition of 'f4' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:106 {{previous definition is here}}
+struct f5;
+// since-cxx20-error@-1 {{redefinition of 'f5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:108 {{previous definition is here}}
+template <typename>
+void f6(int);
+// since-cxx20-error@-1 {{redefinition of 'f6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:110 {{previous definition is here}}
+template <typename, typename>
+int f7;
+// since-cxx20-error@-1 {{redefinition of 'f7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:112 {{previous definition is here}}
+template <typename T>
+int f8<T, int>;
+// since-cxx20-error@-1 {{no variable template matches partial specialization}}
+template <typename>
+using f9 = int;
+// since-cxx20-error@-1 {{redefinition of 'f9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:116 {{previous definition is here}}
+template <typename>
+concept f10 = true;
+// since-cxx20-error@-1 {{redefinition of 'f10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:118 {{previous definition is here}}
+
+
+// Part G: matching against `template <typename> void g(int);`
+// -----------------------------------------------------------
+
+int g1;
+// since-cxx20-error@-1 {{redefinition of 'g1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:122 {{previous definition is here}}
+void g2();
+// @-1 OK, a non-corresponding overload
+enum Eg {
+  g3
+  // MISSING-since-cxx20-error@-1 {{redefinition of 'g3'}}
+  //   MISSING-since-cxx20-note@cwg1884_A.cppm:126 {{previous definition is here}}
+};
+namespace g4 {}
+// MISSING-since-cxx20-error@-1 {{redefinition of 'g4' as different kind of symbol}}
+//   MISSING-since-cxx20-note@cwg1884_A.cppm:128 {{previous definition is here}}
+struct g5;
+// @-1 OK, types and function templates do not correspond
+template <typename>
+class g6;
+// since-cxx20-error@-1 {{redefinition of 'g6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:132 {{previous definition is here}}
+template <typename, typename>
+int g7;
+// since-cxx20-error@-1 {{redefinition of 'g7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:134 {{previous definition is here}}
+template <typename T>
+int g8<T, int>;
+// since-cxx20-error@-1 {{no variable template matches specialization; did you mean to use 'g8' as function template instead?}}
+template <typename>
+using g9 = int;
+// since-cxx20-error@-1 {{redefinition of 'g9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:138 {{previous definition is here}}
+template <typename>
+concept g10 = true;
+// since-cxx20-error@-1 {{redefinition of 'g10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:140 {{previous definition is here}}
+
+
+// Part H: matching against `template <typename, typename> int h;`
+// ---------------------------------------------------------------
+
+int h1;
+// since-cxx20-error@-1 {{redefinition of 'h1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:142 {{previous definition is here}}
+void h2();
+// since-cxx20-error@-1 {{redefinition of 'h2' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:144 {{previous definition is here}}
+enum Eh {
+  h3
+  // since-cxx20-error@-1 {{redefinition of 'h3'}}
+  //   since-cxx20-note@cwg1884_A.cppm:146 {{previous definition is here}}
+};
+namespace h4 {}
+// since-cxx20-error@-1 {{redefinition of 'h4' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:148 {{previous definition is here}}
+struct h5;
+// since-cxx20-error@-1 {{redefinition of 'h5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:150 {{previous definition is here}}
+template <typename>
+class h6;
+// since-cxx20-error@-1 {{redefinition of 'h6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:152 {{previous definition is here}}
+template <typename>
+void h7(int);
+// since-cxx20-error@-1 {{redefinition of 'h7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:154 {{previous definition is here}}
+template <typename T>
+int h8<T, int>;
+// @-1 OK, partial specialization
+template <typename>
+using h9 = int;
+// since-cxx20-error@-1 {{redefinition of 'h9' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:158 {{previous definition is here}}
+template <typename>
+concept h10 = true;
+// since-cxx20-error@-1 {{redefinition of 'h10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:160 {{previous definition is here}}
+
+
+// Part I: matching against `template <typename> using i = int;`
+// -------------------------------------------------------------
+
+int i1;
+// since-cxx20-error@-1 {{redefinition of 'i1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:162 {{previous definition is here}}
+void i2();
+// since-cxx20-error@-1 {{redefinition of 'i2' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:164 {{previous definition is here}}
+enum Ei {
+  i3
+  // since-cxx20-error@-1 {{redefinition of 'i3'}}
+  //   since-cxx20-note@cwg1884_A.cppm:166 {{previous definition is here}}
+};
+namespace i4 {}
+// since-cxx20-error@-1 {{redefinition of 'i4' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:168 {{previous definition is here}}
+struct i5;
+// since-cxx20-error@-1 {{redefinition of 'i5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:170 {{previous definition is here}}
+template <typename>
+class i6;
+// since-cxx20-error@-1 {{redefinition of 'i6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:172 {{previous definition is here}}
+template <typename>
+void i7(int);
+// since-cxx20-error@-1 {{redefinition of 'i7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:174 {{previous definition is here}}
+template <typename, typename>
+int i8;
+// since-cxx20-error@-1 {{redefinition of 'i8' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:176 {{previous definition is here}}
+template <typename T>
+int i9<T, int>;
+// since-cxx20-error@-1 {{no variable template matches partial specialization}}
+template <typename>
+concept i10 = true;
+// since-cxx20-error@-1 {{redefinition of 'i10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:180 {{previous definition is here}}
+
+
+// Part J: matching against `template <typename> concept j = true;`
+// ----------------------------------------------------------------
+
+int j1;
+// since-cxx20-error@-1 {{redefinition of 'j1' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:184 {{previous definition is here}}
+void j2();
+// since-cxx20-error@-1 {{redefinition of 'j2' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:186 {{previous definition is here}}
+enum Ej {
+  j3
+  // since-cxx20-error@-1 {{redefinition of 'j3'}}
+  //   since-cxx20-note@cwg1884_A.cppm:188 {{previous definition is here}}
+};
+namespace j4 {}
+// since-cxx20-error@-1 {{redefinition of 'j4' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:190 {{previous definition is here}}
+struct j5;
+// since-cxx20-error@-1 {{redefinition of 'j5' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:192 {{previous definition is here}}
+template <typename>
+class j6;
+// since-cxx20-error@-1 {{redefinition of 'j6' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:194 {{previous definition is here}}
+template <typename>
+void j7(int);
+// since-cxx20-error@-1 {{redefinition of 'j7' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:196 {{previous definition is here}}
+template <typename, typename>
+int j8;
+// since-cxx20-error@-1 {{redefinition of 'j8' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:198 {{previous definition is here}}
+template <typename T>
+int j9<T, int>;
+// since-cxx20-error@-1 {{no variable template matches partial specialization}}
+template <typename>
+using j10 = int;
+// since-cxx20-error@-1 {{redefinition of 'j10' as different kind of symbol}}
+//   since-cxx20-note@cwg1884_A.cppm:202 {{previous definition is here}}
diff --git a/clang/test/CXX/drs/cwg18xx.cpp b/clang/test/CXX/drs/cwg18xx.cpp
index 7f0fb8cf589d48c..0fd2cd6b2d870c0 100644
--- a/clang/test/CXX/drs/cwg18xx.cpp
+++ b/clang/test/CXX/drs/cwg18xx.cpp
@@ -547,6 +547,8 @@ namespace cwg1881 { // cwg1881: 7
   static_assert(!__is_standard_layout(D), "");
 }
 
+// cwg1884 is in cwg1884.cpp
+
 namespace cwg1890 { // cwg1890: no drafting 2018-06-04
 // FIXME: current consensus for CWG2335 is that the examples are well-formed.
 namespace ex1 {
@@ -640,3 +642,86 @@ namespace H {
   struct S s;
 }
 }
+
+namespace cwg1898 { // cwg1898: 2.7
+void e(int) {} // #cwg1898-e
+void e(int) {}
+// expected-error@-1 {{redefinition of 'e'}}
+//   expected-note@#cwg1898-e {{previous definition is here}}
+
+void e2(int) {}
+void e2(long) {} // OK, different type
+
+void f(int) {} // #cwg1898-f
+void f(const int) {}
+// expected-error@-1 {{redefinition of 'f'}}
+//   expected-note@#cwg1898-f {{previous definition is here}}
+
+void g(int) {} // #cwg1898-g
+void g(volatile int) {}
+// since-cxx20-warning@-1 {{volatile-qualified parameter type 'volatile int' is deprecated}}
+// expected-error@-2 {{redefinition of 'g'}}
+//   expected-note@#cwg1898-g {{previous definition is here}}
+
+void h(int *) {} // #cwg1898-h
+void h(int[]) {}
+// expected-error@-1 {{redefinition of 'h'}}
+//   expected-note@#cwg1898-h {{previous definition is here}}
+
+void h2(int *) {} // #cwg1898-h2
+void h2(int[2]) {}
+// expected-error@-1 {{redefinition of 'h2'}}
+//   expected-note@#cwg1898-h2 {{previous definition is here}}
+
+void h3(int (*)[2]) {} // #cwg1898-h3
+void h3(int [3][2]) {}
+// expected-error@-1 {{redefinition of 'h3'}}
+//   expected-note@#cwg1898-h3 {{previous definition is here}}
+
+void h4(int (*)[2]) {}
+void h4(int [3][3]) {} // OK, differ in non-top-level extent of array
+
+void i(int *) {}
+void i(const int *) {} // OK, pointee cv-qualification is not discarded
+
+void i2(int *) {} // #cwg1898-i2
+void i2(int * const) {}
+// expected-error@-1 {{redefinition of 'i2'}}
+//   expected-note@#cwg1898-i2 {{previous definition is here}}
+
+void j(void(*)()) {} // #cwg1898-j
+void j(void()) {}
+// expected-error@-1 {{redefinition of 'j'}}
+//   expected-note@#cwg1898-j {{previous definition is here}}
+
+void j2(void(int)) {} // #cwg1898-j2
+void j2(void(const int)) {}
+// expected-error@-1 {{redefinition of 'j2'}}
+//   expected-note@#cwg1898-j2 {{previous definition is here}}
+
+struct A {
+  void k(int) {} // #cwg1898-k
+  void k(int) {}
+  // expected-error@-1 {{class member cannot be redeclared}}
+  //   expected-note@#cwg1898-k {{previous definition is here}}
+};
+
+struct B : A {
+  void k(int) {} // OK, shadows A::k
+};
+
+void l() {}
+void l(...) {}
+
+#if __cplusplus >= 201103L
+template <typename T>
+void m(T) {}
+template <typename... Ts>
+void m(Ts...) {}
+
+template <typename T, typename U>
+void m2(T, U) {}
+template <typename... Ts, typename U>
+void m2(Ts..., U) {}
+#endif
+} // namespace cwg1898
diff --git a/clang/test/CXX/drs/cwg279.cpp b/clang/test/CXX/drs/cwg279.cpp
new file mode 100644
index 000000000000000..3c63486cc0dd5ef
--- /dev/null
+++ b/clang/test/CXX/drs/cwg279.cpp
@@ -0,0 +1,53 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: split-file --leading-lines %s %t
+// RUN: %clang_cc1 -std=c++20 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg279_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg279_A.pcm
+// RUN: %clang_cc1 -std=c++20 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg279.cpp -fmodule-file=cwg279_A=%t/cwg279_A.pcm
+// RUN: %clang_cc1 -std=c++23 -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg279_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg279_A.pcm
+// RUN: %clang_cc1 -std=c++23 -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg279.cpp -fmodule-file=cwg279_A=%t/cwg279_A.pcm
+// RUN: %clang_cc1 -std=c++2c -pedantic-errors -fexceptions -fcxx-exceptions %t/cwg279_A.cppm -triple x86_64-unknown-unknown -emit-module-interface -o %t/cwg279_A.pcm
+// RUN: %clang_cc1 -std=c++2c -verify=since-cxx20 -pedantic-errors -fexceptions -fcxx-exceptions -triple x86_64-unknown-unknown %t/cwg279.cpp -fmodule-file=cwg279_A=%t/cwg279_A.pcm
+
+// cwg279: no
+
+//--- cwg279_A.cppm
+export module cwg279_A;
+
+export {
+struct S; // #cwg279-S
+extern S *q; // #cwg279-q
+
+struct S2 {}; // #cwg279-S2
+extern S2 *q2; // #cwg279-q2
+
+struct S3 {}; // #cwg279-S3
+extern S3 *q3; // #cwg279-q3
+} // export
+
+//--- cwg279.cpp
+import cwg279_A;
+
+// FIXME: We should use markers instead. They are less fragile,
+//        but -verify doesn't support them across modules yet.
+// FIXME: This is well-formed. Previous "definition" is actually just a declaration.
+typedef struct {} S;
+// since-cxx20-error@-1 {{typedef redefinition with different types ('struct S' vs 'S')}}
+//   since-cxx20-note@cwg279_A.cppm:17 {{previous definition is here}}
+extern S *q;
+// since-cxx20-error@-1 {{declaration of 'q' in the global module follows declaration in module cwg279_A}}
+//   since-cxx20-note@cwg279_A.cppm:18 {{previous declaration is here}}
+
+typedef struct {} S2;
+// since-cxx20-error@-1 {{typedef redefinition with different types ('struct S2' vs 'S2')}}
+//   since-cxx20-note@cwg279_A.cppm:20 {{previous definition is here}}
+extern S2 *q2;
+// since-cxx20-error@-1 {{declaration of 'q2' in the global module follows declaration in module cwg279_A}}
+//   since-cxx20-note@cwg279_A.cppm:21 {{previous declaration is here}}
+
+// FIXME: This is well-formed, because [basic.def.odr]/15 is satisfied.
+struct S3 {};
+// since-cxx20-error@-1 {{redefinition of 'S3'}}
+//   since-cxx20-note@cwg279_A.cppm:23 {{previous definition is here}}
+extern S3 *q3;
+// since-cxx20-error@-1 {{declaration of 'q3' in the global module follows declaration in module cwg279_A}}
+//   since-cxx20-note@cwg279_A.cppm:24 {{previous declaration is here}}
diff --git a/clang/test/CXX/drs/cwg2xx.cpp b/clang/test/CXX/drs/cwg2xx.cpp
index 926cb19596026b4..ec37b420880e28a 100644
--- a/clang/test/CXX/drs/cwg2xx.cpp
+++ b/clang/test/CXX/drs/cwg2xx.cpp
@@ -1032,6 +1032,8 @@ namespace cwg277 { // cwg277: 3.1
   static_assert(__enable_constant_folding(!intp()), "");
 }
 
+// cwg279 is in cwg279.cpp
+
 namespace cwg280 { // cwg280: 2.9
   typedef void f0();
   typedef void f1(int);
diff --git a/clang/test/CXX/drs/cwg3xx.cpp b/clang/test/CXX/drs/cwg3xx.cpp
index f20054c3701b1ce..10c8d86ed16a0db 100644
--- a/clang/test/CXX/drs/cwg3xx.cpp
+++ b/clang/test/CXX/drs/cwg3xx.cpp
@@ -637,6 +637,8 @@ namespace cwg337 { // cwg337: yes
   struct B { virtual ~B() = 0; };
 }
 
+// cwg338: dup 1884
+
 namespace cwg339 { // cwg339: 2.8
   template <int I> struct A { static const int value = I; };
 
diff --git a/clang/test/ClangScanDeps/link-libraries.c b/clang/test/ClangScanDeps/link-libraries.c
index c09691d2356efcc..bc0b0c546ea032d 100644
--- a/clang/test/ClangScanDeps/link-libraries.c
+++ b/clang/test/ClangScanDeps/link-libraries.c
@@ -39,14 +39,13 @@ module transitive {
 // CHECK-NEXT:   "modules": [
 // CHECK-NEXT:     {
 // CHECK-NEXT:       "clang-module-deps": [],
-// CHECK-NEXT:       "clang-modulemap-file": "{{.*}}/__inferred_module.map",
+// CHECK-NEXT:       "clang-modulemap-file": "[[PREFIX]]/Inputs/frameworks/module.modulemap",
 // CHECK-NEXT:       "command-line": [
 // CHECK:            ],
 // CHECK-NEXT:       "context-hash": "{{.*}}",
 // CHECK-NEXT:       "file-deps": [
-// CHECK-NEXT:         "{{.*}}/Framework.h"
-// CHECK-NEXT:         "{{.*}}/__inferred_module.map"
-// CHECK-NEXT:         "{{.*}}/module.modulemap"
+// CHECK-NEXT:         "[[PREFIX]]/Inputs/frameworks/Framework.framework/Headers/Framework.h"
+// CHECK-NEXT:         "[[PREFIX]]/Inputs/frameworks/module.modulemap"
 // CHECK-NEXT:       ],
 // CHECK-NEXT:       "link-libraries": [
 // CHECK-NEXT:         {
diff --git a/clang/test/ClangScanDeps/print-timing.c b/clang/test/ClangScanDeps/print-timing.c
index f27df1ebf732a9c..fa2a433b9553708 100644
--- a/clang/test/ClangScanDeps/print-timing.c
+++ b/clang/test/ClangScanDeps/print-timing.c
@@ -3,7 +3,8 @@
 
 // RUN: clang-scan-deps -compilation-database %t/cdb.json -print-timing > %t/result.json 2>%t/errs
 // RUN: cat %t/errs | FileCheck %s
-// CHECK: clang-scan-deps timing: {{[0-9]+}}.{{[0-9][0-9]}}s wall, {{[0-9]+}}.{{[0-9][0-9]}}s process
+// CHECK:      wall time [s]              process time [s]           instruction count
+// CHECK-NEXT: {{[0-9]+}}.{{([0-9]{4})}}  {{[0-9]+}}.{{([0-9]{4})}}  {{[0-9]+}}
 
 //--- cdb.json
 []
diff --git a/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c b/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c
new file mode 100644
index 000000000000000..cabff7e598eb02a
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/attr-hw-shadow-stack.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zicfiss -emit-llvm -o - %s -fcf-protection=return | FileCheck %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-zicfiss -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s
+// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zicfiss -emit-llvm -o - %s -fcf-protection=return | FileCheck %s
+// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-zicfiss -emit-llvm -o - %s | FileCheck -check-prefix=NOSHADOWSTACK %s
+
+int foo(int *a) { return *a; }
+
+// CHECK: attributes {{.*}}"hw-shadow-stack"{{.*}}
+// NOSHADOWSTACK-NOT: attributes {{.*}}"hw-shadow-stack"{{.*}}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 9d6c1897f540d38..4e56204c8ad40fa 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -11,6 +11,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
 
@@ -1740,18 +1741,21 @@ __m256d test_mm256_setzero_pd(void) {
   // CHECK: store <4 x double> zeroinitializer
   return _mm256_setzero_pd();
 }
+TEST_CONSTEXPR(match_m256d(_mm256_setzero_pd(), +0.0, +0.0, +0.0, +0.0));
 
 __m256 test_mm256_setzero_ps(void) {
   // CHECK-LABEL: test_mm256_setzero_ps
   // CHECK: store <8 x float> zeroinitializer
   return _mm256_setzero_ps();
 }
+TEST_CONSTEXPR(match_m256(_mm256_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m256i test_mm256_setzero_si256(void) {
   // CHECK-LABEL: test_mm256_setzero_si256
   // CHECK: store <4 x i64> zeroinitializer
   return _mm256_setzero_si256();
 }
+TEST_CONSTEXPR(match_m256i(_mm256_setzero_si256(), 0, 0, 0, 0));
 
 __m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
   // CHECK-LABEL: test_mm256_shuffle_pd
@@ -2097,19 +2101,3 @@ float test_mm256_cvtss_f32(__m256 __a)
   // CHECK: extractelement <8 x float> %{{.*}}, i32 0
   return _mm256_cvtss_f32(__a);
 }
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
-  constexpr __m256d v_mm256_setzero_pd = _mm256_setzero_pd();
-  static_assert(v_mm256_setzero_pd[0] == +0.0 && v_mm256_setzero_pd[1] == +0.0 && v_mm256_setzero_pd[2] == +0.0 && v_mm256_setzero_pd[3] == +0.0);
-
-  constexpr __m256 v_mm256_setzero_ps = _mm256_setzero_ps();
-  static_assert(v_mm256_setzero_ps[0] == +0.0f && v_mm256_setzero_ps[1] == +0.0f && v_mm256_setzero_ps[2] == +0.0f && v_mm256_setzero_ps[3] == +0.0f && v_mm256_setzero_ps[4] == +0.0f && v_mm256_setzero_ps[5] == +0.0f && v_mm256_setzero_ps[6] == +0.0f && v_mm256_setzero_ps[7] == +0.0f);
-
-  constexpr __m256i v_mm256_setzero_si256 = _mm256_setzero_si256();
-  static_assert(v_mm256_setzero_si256[0] == 0x0000000000000000ULL && v_mm256_setzero_si256[1] == 0x0000000000000000ULL && v_mm256_setzero_si256[2] == 0x0000000000000000ULL && v_mm256_setzero_si256[3] == 0x0000000000000000ULL);
-}
-
-#endif
diff --git a/clang/test/CodeGen/X86/avx-cmp-builtins.c b/clang/test/CodeGen/X86/avx-cmp-builtins.c
index c4e3c7ccd54988d..2e4a383a6b3fca1 100644
--- a/clang/test/CodeGen/X86/avx-cmp-builtins.c
+++ b/clang/test/CodeGen/X86/avx-cmp-builtins.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
-// FIXME: The shufflevector instructions in test_cmpgt_sd are relying on O3 here.
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s
 
 
 #include <immintrin.h>
@@ -9,62 +9,124 @@
 // Test LLVM IR codegen of cmpXY instructions
 //
 
+// CHECK-LABEL: define dso_local <2 x double> @test_cmp_sd(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[A]], <2 x double> [[B]], i8 13)
+// CHECK-NEXT:    ret <2 x double> [[TMP0]]
+//
 __m128d test_cmp_sd(__m128d a, __m128d b) {
   // Expects that the third argument in LLVM IR is immediate expression
-  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 13)
   return _mm_cmp_sd(a, b, _CMP_GE_OS);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_cmp_ss(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[A]], <4 x float> [[B]], i8 13)
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
 __m128 test_cmp_ss(__m128 a, __m128 b) {
   // Expects that the third argument in LLVM IR is immediate expression
-  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 13)
   return _mm_cmp_ss(a, b, _CMP_GE_OS);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_cmpgt_ss(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 1)
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x float> [[SHUFFLE_I]]
+//
 __m128 test_cmpgt_ss(__m128 a, __m128 b) {
-  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 1)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpgt_ss(a, b);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_cmpge_ss(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 2)
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x float> [[SHUFFLE_I]]
+//
 __m128 test_cmpge_ss(__m128 a, __m128 b) {
-  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 2)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpge_ss(a, b);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_cmpngt_ss(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 5)
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x float> [[SHUFFLE_I]]
+//
 __m128 test_cmpngt_ss(__m128 a, __m128 b) {
-  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 5)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpngt_ss(a, b);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_cmpnge_ss(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[B]], <4 x float> [[A]], i8 6)
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[TMP0]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x float> [[SHUFFLE_I]]
+//
 __m128 test_cmpnge_ss(__m128 a, __m128 b) {
-  // CHECK: @llvm.x86.sse.cmp.ss({{.*}}, i8 6)
-  // CHECK: shufflevector <{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   return _mm_cmpnge_ss(a, b);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_cmpgt_sd(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 1)
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1
+// CHECK-NEXT:    ret <2 x double> [[VECINIT2_I]]
+//
 __m128d test_cmpgt_sd(__m128d a, __m128d b) {
-  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 1)
-  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
   return _mm_cmpgt_sd(a, b);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_cmpge_sd(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 2)
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1
+// CHECK-NEXT:    ret <2 x double> [[VECINIT2_I]]
+//
 __m128d test_cmpge_sd(__m128d a, __m128d b) {
-  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 2)
-  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
   return _mm_cmpge_sd(a, b);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_cmpngt_sd(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 5)
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1
+// CHECK-NEXT:    ret <2 x double> [[VECINIT2_I]]
+//
 __m128d test_cmpngt_sd(__m128d a, __m128d b) {
-  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 5)
-  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
   return _mm_cmpngt_sd(a, b);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_cmpnge_sd(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[B]], <2 x double> [[A]], i8 6)
+// CHECK-NEXT:    [[VECEXT_I:%.*]] = extractelement <2 x double> [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <2 x double> poison, double [[VECEXT_I]], i32 0
+// CHECK-NEXT:    [[VECEXT1_I:%.*]] = extractelement <2 x double> [[A]], i32 1
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double [[VECEXT1_I]], i32 1
+// CHECK-NEXT:    ret <2 x double> [[VECINIT2_I]]
+//
 __m128d test_cmpnge_sd(__m128d a, __m128d b) {
-  // CHECK: @llvm.x86.sse2.cmp.sd({{.*}}, i8 6)
-  // CHECK: shufflevector <{{.*}}, <2 x i32> <i32 0, i32 3>
   return _mm_cmpnge_sd(a, b);
 }
diff --git a/clang/test/CodeGen/X86/avx-shuffle-builtins.c b/clang/test/CodeGen/X86/avx-shuffle-builtins.c
index d184d28f3e07aa9..1c05fa436983ed7 100644
--- a/clang/test/CodeGen/X86/avx-shuffle-builtins.c
+++ b/clang/test/CodeGen/X86/avx-shuffle-builtins.c
@@ -1,7 +1,7 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X64
-// RUN: %clang_cc1 -ffreestanding %s -O3 -triple=i386-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,X86
-// FIXME: This is testing optimized generation of shuffle instructions and should be fixed.
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding %s -triple=i386-- -target-feature +avx -disable-O0-optnone -emit-llvm -o - | opt -S -passes=mem2reg | FileCheck %s
 
 
 #include <immintrin.h>
@@ -10,201 +10,341 @@
 // Test LLVM IR codegen of shuffle instructions, checking if the masks are correct
 //
 
+// CHECK-LABEL: define dso_local <8 x float> @x(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFP:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+// CHECK-NEXT:    ret <8 x float> [[SHUFP]]
+//
 __m256 x(__m256 a, __m256 b) {
-  // CHECK-LABEL: x
-  // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
   return _mm256_shuffle_ps(a, b, 203);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_mm_permute_pd(
+// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PERMIL:%.*]] = shufflevector <2 x double> [[A]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
+// CHECK-NEXT:    ret <2 x double> [[PERMIL]]
+//
 __m128d test_mm_permute_pd(__m128d a) {
-  // CHECK-LABEL: test_mm_permute_pd
-  // CHECK: shufflevector{{.*}}<i32 1, i32 0>
   return _mm_permute_pd(a, 1);
 }
 
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute_pd(
+// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+// CHECK-NEXT:    ret <4 x double> [[PERMIL]]
+//
 __m256d test_mm256_permute_pd(__m256d a) {
-  // CHECK-LABEL: test_mm256_permute_pd
-  // CHECK: shufflevector{{.*}}<i32 1, i32 0, i32 3, i32 2>
   return _mm256_permute_pd(a, 5);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <4 x float> [[PERMIL]]
+//
 __m128 test_mm_permute_ps(__m128 a) {
-  // CHECK-LABEL: test_mm_permute_ps
-  // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0>
   return _mm_permute_ps(a, 0x1b);
 }
 
-// Test case for PR12401
+// CHECK-LABEL: define dso_local <4 x float> @test_mm_permute_ps2(
+// CHECK-SAME: <4 x float> noundef [[A:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x float> [[PERMIL]]
+//
 __m128 test_mm_permute_ps2(__m128 a) {
-  // CHECK-LABEL: test_mm_permute_ps2
-  // CHECK: shufflevector{{.*}}<i32 2, i32 1, i32 2, i32 3>
   return _mm_permute_ps(a, 0xe6);
 }
 
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute_ps(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[PERMIL:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+// CHECK-NEXT:    ret <8 x float> [[PERMIL]]
+//
 __m256 test_mm256_permute_ps(__m256 a) {
-  // CHECK-LABEL: test_mm256_permute_ps
-  // CHECK: shufflevector{{.*}}<i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   return _mm256_permute_ps(a, 0x1b);
 }
 
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_permute2f128_pd(
+// CHECK-SAME: <4 x double> noundef [[A:%.*]], <4 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VPERM:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x double> [[VPERM]]
+//
 __m256d test_mm256_permute2f128_pd(__m256d a, __m256d b) {
-  // CHECK-LABEL: test_mm256_permute2f128_pd
-  // CHECK: shufflevector{{.*}}<i32 2, i32 3, i32 6, i32 7> 
   return _mm256_permute2f128_pd(a, b, 0x31);
 }
 
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_permute2f128_ps(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]], <8 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VPERM:%.*]] = shufflevector <8 x float> [[B]], <8 x float> [[A]], <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    ret <8 x float> [[VPERM]]
+//
 __m256 test_mm256_permute2f128_ps(__m256 a, __m256 b) {
-  // CHECK-LABEL: test_mm256_permute2f128_ps
-  // CHECK: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
   return _mm256_permute2f128_ps(a, b, 0x13);
 }
 
+// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_permute2f128_si256(
+// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <4 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i64> [[B]] to <8 x i32>
+// CHECK-NEXT:    [[VPERM:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[VPERM]] to <4 x i64>
+// CHECK-NEXT:    ret <4 x i64> [[TMP2]]
+//
 __m256i test_mm256_permute2f128_si256(__m256i a, __m256i b) {
-  // CHECK-LABEL: test_mm256_permute2f128_si256
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
   return _mm256_permute2f128_si256(a, b, 0x20);
 }
 
-__m128
-test_mm_broadcast_ss(float const *__a) {
-  // CHECK-LABEL: test_mm_broadcast_ss
-  // CHECK: insertelement <4 x float> {{.*}}, i64 0
-  // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> poison, <4 x i32> zeroinitializer
+// CHECK-LABEL: define dso_local <4 x float> @test_mm_broadcast_ss(
+// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A]], align 1
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float [[TMP0]], i32 1
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float [[TMP0]], i32 2
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <4 x float> [[VECINIT3_I]], float [[TMP0]], i32 3
+// CHECK-NEXT:    ret <4 x float> [[VECINIT4_I]]
+//
+__m128 test_mm_broadcast_ss(float const *__a) {
   return _mm_broadcast_ss(__a);
 }
 
-__m256d
-test_mm256_broadcast_sd(double const *__a) {
-  // CHECK-LABEL: test_mm256_broadcast_sd
-  // CHECK: insertelement <4 x double> {{.*}}, i64 0
-  // CHECK: shufflevector <4 x double> {{.*}}, <4 x double> poison, <4 x i32> zeroinitializer
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_broadcast_sd(
+// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[__A]], align 1
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <4 x double> [[VECINIT_I]], double [[TMP0]], i32 1
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <4 x double> [[VECINIT2_I]], double [[TMP0]], i32 2
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <4 x double> [[VECINIT3_I]], double [[TMP0]], i32 3
+// CHECK-NEXT:    ret <4 x double> [[VECINIT4_I]]
+//
+__m256d test_mm256_broadcast_sd(double const *__a) {
   return _mm256_broadcast_sd(__a);
 }
 
-__m256
-test_mm256_broadcast_ss(float const *__a) {
-  // CHECK-LABEL: test_mm256_broadcast_ss
-  // CHECK: insertelement <8 x float> {{.*}}, i64 0
-  // CHECK: shufflevector <8 x float> {{.*}}, <8 x float> poison, <8 x i32> zeroinitializer
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_broadcast_ss(
+// CHECK-SAME: ptr noundef [[__A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[__A]], align 1
+// CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP0]], i32 1
+// CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP0]], i32 2
+// CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP0]], i32 3
+// CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP0]], i32 4
+// CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP0]], i32 5
+// CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP0]], i32 6
+// CHECK-NEXT:    [[VECINIT8_I:%.*]] = insertelement <8 x float> [[VECINIT7_I]], float [[TMP0]], i32 7
+// CHECK-NEXT:    ret <8 x float> [[VECINIT8_I]]
+//
+__m256 test_mm256_broadcast_ss(float const *__a) {
   return _mm256_broadcast_ss(__a);
 }
 
 // Make sure we have the correct mask for each insertf128 case.
 
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_0(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <8 x float> [[INSERT]]
+//
 __m256 test_mm256_insertf128_ps_0(__m256 a, __m128 b) {
-  // CHECK-LABEL: test_mm256_insertf128_ps_0
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   return _mm256_insertf128_ps(a, b, 0);
 }
 
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_0(
+// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x double> [[INSERT]]
+//
 __m256d test_mm256_insertf128_pd_0(__m256d a, __m128d b) {
-  // CHECK-LABEL: test_mm256_insertf128_pd_0
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 6, i32 7>
   return _mm256_insertf128_pd(a, b, 0);
 }
 
+// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_0(
+// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
+// CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+// CHECK-NEXT:    ret <4 x i64> [[TMP2]]
+//
 __m256i test_mm256_insertf128_si256_0(__m256i a, __m128i b) {
-  // CHECK-LABEL: test_mm256_insertf128_si256_0
-  // X64: shufflevector{{.*}}<i32 0, i32 1, i32 6, i32 7>
-  // X86: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
   return _mm256_insertf128_si256(a, b, 0);
 }
 
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_insertf128_ps_1(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    ret <8 x float> [[INSERT]]
+//
 __m256 test_mm256_insertf128_ps_1(__m256 a, __m128 b) {
-  // CHECK-LABEL: test_mm256_insertf128_ps_1
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   return _mm256_insertf128_ps(a, b, 1);
 }
 
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_insertf128_pd_1(
+// CHECK-SAME: <4 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[WIDEN]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+// CHECK-NEXT:    ret <4 x double> [[INSERT]]
+//
 __m256d test_mm256_insertf128_pd_1(__m256d a, __m128d b) {
-  // CHECK-LABEL: test_mm256_insertf128_pd_1
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
   return _mm256_insertf128_pd(a, b, 1);
 }
 
+// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_insertf128_si256_1(
+// CHECK-SAME: <4 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
+// CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> [[WIDEN]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[INSERT]] to <4 x i64>
+// CHECK-NEXT:    ret <4 x i64> [[TMP2]]
+//
 __m256i test_mm256_insertf128_si256_1(__m256i a, __m128i b) {
-  // CHECK-LABEL: test_mm256_insertf128_si256_1
-  // X64: shufflevector{{.*}}<i32 0, i32 1, i32 4, i32 5>
-  // X86: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   return _mm256_insertf128_si256(a, b, 1);
 }
 
 // Make sure we have the correct mask for each extractf128 case.
 
+// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_0(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x float> [[EXTRACT]]
+//
 __m128 test_mm256_extractf128_ps_0(__m256 a) {
-  // X64-LABEL: test_mm256_extractf128_ps_0
-  // X64: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
-  //
-  // X86-LABEL: test_mm256_extractf128_ps_0
-  // X86: shufflevector{{.*}}<i32 0, i32 1>
   return _mm256_extractf128_ps(a, 0);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_0(
+// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 0, i32 1>
+// CHECK-NEXT:    ret <2 x double> [[EXTRACT]]
+//
 __m128d test_mm256_extractf128_pd_0(__m256d a) {
-  // CHECK-LABEL: test_mm256_extractf128_pd_0
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1>
   return _mm256_extractf128_pd(a, 0);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_0(
+// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+// CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
 __m128i test_mm256_extractf128_si256_0(__m256i a) {
-  // CHECK-LABEL: test_mm256_extractf128_si256_0
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1>
   return _mm256_extractf128_si256(a, 0);
 }
 
+// CHECK-LABEL: define dso_local <4 x float> @test_mm256_extractf128_ps_1(
+// CHECK-SAME: <8 x float> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x float> [[A]], <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <4 x float> [[EXTRACT]]
+//
 __m128 test_mm256_extractf128_ps_1(__m256 a) {
-  // X64-LABEL: test_mm256_extractf128_ps_1
-  // X64: shufflevector{{.*}}<i32 4, i32 5, i32 6, i32 7>
-  //
-  // X86-LABEL: test_mm256_extractf128_ps_1
-  // X86: shufflevector{{.*}}<i32 2, i32 3>
   return _mm256_extractf128_ps(a, 1);
 }
 
+// CHECK-LABEL: define dso_local <2 x double> @test_mm256_extractf128_pd_1(
+// CHECK-SAME: <4 x double> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <2 x i32> <i32 2, i32 3>
+// CHECK-NEXT:    ret <2 x double> [[EXTRACT]]
+//
 __m128d test_mm256_extractf128_pd_1(__m256d a) {
-  // CHECK-LABEL: test_mm256_extractf128_pd_1
-  // CHECK: shufflevector{{.*}}<i32 2, i32 3>
   return _mm256_extractf128_pd(a, 1);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm256_extractf128_si256_1(
+// CHECK-SAME: <4 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64> [[A]] to <8 x i32>
+// CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[EXTRACT]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
 __m128i test_mm256_extractf128_si256_1(__m256i a) {
-  // CHECK-LABEL: test_mm256_extractf128_si256_1
-  // CHECK: shufflevector{{.*}}<i32 2, i32 3>
   return _mm256_extractf128_si256(a, 1);
 }
 
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_set_m128(
+// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <8 x float> [[SHUFFLE_I]]
+//
 __m256 test_mm256_set_m128(__m128 hi, __m128 lo) {
-  // CHECK-LABEL: test_mm256_set_m128
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_set_m128(hi, lo);
 }
 
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_set_m128d(
+// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x double> [[SHUFFLE_I]]
+//
 __m256d test_mm256_set_m128d(__m128d hi, __m128d lo) {
-  // CHECK-LABEL: test_mm256_set_m128d
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
   return _mm256_set_m128d(hi, lo);
 }
 
+// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_set_m128i(
+// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x i64> [[SHUFFLE_I]]
+//
 __m256i test_mm256_set_m128i(__m128i hi, __m128i lo) {
-  // CHECK-LABEL: test_mm256_set_m128i
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>  
   return _mm256_set_m128i(hi, lo);
 }
 
+// CHECK-LABEL: define dso_local <8 x float> @test_mm256_setr_m128(
+// CHECK-SAME: <4 x float> noundef [[HI:%.*]], <4 x float> noundef [[LO:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE_I_I:%.*]] = shufflevector <4 x float> [[LO]], <4 x float> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+// CHECK-NEXT:    ret <8 x float> [[SHUFFLE_I_I]]
+//
 __m256 test_mm256_setr_m128(__m128 hi, __m128 lo) {
-  // CHECK-LABEL: test_mm256_setr_m128
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_setr_m128(lo, hi);
 }
 
+// CHECK-LABEL: define dso_local <4 x double> @test_mm256_setr_m128d(
+// CHECK-SAME: <2 x double> noundef [[HI:%.*]], <2 x double> noundef [[LO:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE_I_I:%.*]] = shufflevector <2 x double> [[LO]], <2 x double> [[HI]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x double> [[SHUFFLE_I_I]]
+//
 __m256d test_mm256_setr_m128d(__m128d hi, __m128d lo) {
-  // CHECK-LABEL: test_mm256_setr_m128d
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
   return _mm256_setr_m128d(lo, hi);
 }
 
+// CHECK-LABEL: define dso_local <4 x i64> @test_mm256_setr_m128i(
+// CHECK-SAME: <2 x i64> noundef [[HI:%.*]], <2 x i64> noundef [[LO:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> [[LO]], <2 x i64> [[HI]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+// CHECK-NEXT:    ret <4 x i64> [[SHUFFLE_I_I]]
+//
 __m256i test_mm256_setr_m128i(__m128i hi, __m128i lo) {
-  // CHECK-LABEL: test_mm256_setr_m128i
-  // CHECK: shufflevector{{.*}}<i32 0, i32 1, i32 2, i32 3>
   return _mm256_setr_m128i(lo, hi);
 }
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 0b4f778a0637ab7..372790a8cd668be 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -4,6 +4,7 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -fms-extensions -fms-compatibility -ffreestanding %s -triple=x86_64-windows-msvc -target-feature +avx512f -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 __m512d test_mm512_sqrt_pd(__m512d a)
 {
@@ -10615,13 +10616,13 @@ __m128 test_mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B) {
   return _mm_maskz_cvtsd_ss(__U, __A, __B); 
 }
 
-
 __m512i test_mm512_setzero_epi32(void)
 {
   // CHECK-LABEL: test_mm512_setzero_epi32
   // CHECK: zeroinitializer
   return _mm512_setzero_epi32();
 }
+TEST_CONSTEXPR(match_m512i(_mm512_setzero_epi32(), 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512 test_mm512_setzero(void)
 {
@@ -10629,6 +10630,7 @@ __m512 test_mm512_setzero(void)
   // CHECK: zeroinitializer
   return _mm512_setzero();
 }
+TEST_CONSTEXPR(match_m512(_mm512_setzero(), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m512i test_mm512_setzero_si512(void)
 {
@@ -10636,6 +10638,7 @@ __m512i test_mm512_setzero_si512(void)
   // CHECK: zeroinitializer
   return _mm512_setzero_si512();
 }
+TEST_CONSTEXPR(match_m512i(_mm512_setzero_si512(), 0, 0, 0, 0, 0, 0, 0, 0));
 
 __m512 test_mm512_setzero_ps(void)
 {
@@ -10643,6 +10646,7 @@ __m512 test_mm512_setzero_ps(void)
   // CHECK: zeroinitializer
   return _mm512_setzero_ps();
 }
+TEST_CONSTEXPR(match_m512(_mm512_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m512d test_mm512_setzero_pd(void)
 {
@@ -10650,6 +10654,7 @@ __m512d test_mm512_setzero_pd(void)
   // CHECK: zeroinitializer
   return _mm512_setzero_pd();
 }
+TEST_CONSTEXPR(match_m512d(_mm512_setzero_pd(), 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0));
 
 __mmask16 test_mm512_int2mask(int __a)
 {
@@ -10880,25 +10885,3 @@ void test_mm512_mask_i32loscatter_epi64(void *__addr, __mmask8 __mask, __m512i _
   // CHECK: @llvm.x86.avx512.mask.scatter.dpq.512
   _mm512_mask_i32loscatter_epi64(__addr, __mask, __index, __v1, 2);
 }
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
-  constexpr __m512 v_mm512_setzero = _mm512_setzero();
-  static_assert(v_mm512_setzero[0] == +0.0f && v_mm512_setzero[1] == +0.0f && v_mm512_setzero[2] == +0.0f && v_mm512_setzero[3] == +0.0f && v_mm512_setzero[4] == +0.0f && v_mm512_setzero[5] == +0.0f && v_mm512_setzero[6] == +0.0f && v_mm512_setzero[7] == +0.0f && v_mm512_setzero[8] == +0.0f && v_mm512_setzero[9] == +0.0f && v_mm512_setzero[10] == +0.0f && v_mm512_setzero[11] == +0.0f && v_mm512_setzero[12] == +0.0f && v_mm512_setzero[13] == +0.0f && v_mm512_setzero[14] == +0.0f && v_mm512_setzero[15] == +0.0f);
-
-  constexpr __m512 v_mm512_setzero_ps = _mm512_setzero_ps();
-  static_assert(v_mm512_setzero_ps[0] == +0.0f && v_mm512_setzero_ps[1] == +0.0f && v_mm512_setzero_ps[2] == +0.0f && v_mm512_setzero_ps[3] == +0.0f && v_mm512_setzero_ps[4] == +0.0f && v_mm512_setzero_ps[5] == +0.0f && v_mm512_setzero_ps[6] == +0.0f && v_mm512_setzero_ps[7] == +0.0f && v_mm512_setzero_ps[8] == +0.0f && v_mm512_setzero_ps[9] == +0.0f && v_mm512_setzero_ps[10] == +0.0f && v_mm512_setzero_ps[11] == +0.0f && v_mm512_setzero_ps[12] == +0.0f && v_mm512_setzero_ps[13] == +0.0f && v_mm512_setzero_ps[14] == +0.0f && v_mm512_setzero_ps[15] == +0.0f);
-
-  constexpr __m512d v_mm512_setzero_pd = _mm512_setzero_pd();
-  static_assert(v_mm512_setzero_pd[0] == +0.0 && v_mm512_setzero_pd[1] == +0.0 && v_mm512_setzero_pd[2] == +0.0 && v_mm512_setzero_pd[3] == +0.0 && v_mm512_setzero_pd[4] == +0.0 && v_mm512_setzero_pd[5] == +0.0 && v_mm512_setzero_pd[6] == +0.0 && v_mm512_setzero_pd[7] == +0.0);
-
-  constexpr __m512i v_mm512_setzero_si512 = _mm512_setzero_si512();
-  static_assert(v_mm512_setzero_si512[0] == 0x0000000000000000ULL && v_mm512_setzero_si512[1] == 0x0000000000000000ULL && v_mm512_setzero_si512[2] == 0x0000000000000000ULL && v_mm512_setzero_si512[3] == 0x0000000000000000ULL && v_mm512_setzero_si512[4] == 0x0000000000000000ULL && v_mm512_setzero_si512[5] == 0x0000000000000000ULL && v_mm512_setzero_si512[6] == 0x0000000000000000ULL && v_mm512_setzero_si512[7] == 0x0000000000000000ULL);
-
-  constexpr __m512i v_mm512_setzero_epi32 = _mm512_setzero_epi32();
-  static_assert(v_mm512_setzero_epi32[0] == 0x0000000000000000ULL && v_mm512_setzero_epi32[1] == 0x0000000000000000ULL && v_mm512_setzero_epi32[2] == 0x0000000000000000ULL && v_mm512_setzero_epi32[3] == 0x0000000000000000ULL && v_mm512_setzero_epi32[4] == 0x0000000000000000ULL && v_mm512_setzero_epi32[5] == 0x0000000000000000ULL && v_mm512_setzero_epi32[6] == 0x0000000000000000ULL && v_mm512_setzero_epi32[7] == 0x0000000000000000ULL);
-}
-
-#endif
diff --git a/clang/test/CodeGen/X86/builtin_test_helpers.h b/clang/test/CodeGen/X86/builtin_test_helpers.h
new file mode 100644
index 000000000000000..01800db33afbb9b
--- /dev/null
+++ b/clang/test/CodeGen/X86/builtin_test_helpers.h
@@ -0,0 +1,68 @@
+/* Helper methods for builtin intrinsic tests */
+
+#include <immintrin.h>
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+constexpr bool match_m128(__m128 v, float a, float b, float c, float d) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_m128d(__m128d v, double a, double b) {
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_m128i(__m128i v, unsigned long long a, unsigned long long b) {
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v2di(__m128i v, long long a, long long b) {
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
+  __v4si v = (__v4si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, short e, short f, short g, short h) {
+  __v8hi v = (__v8hi)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v16qi(__m128i _v, char a, char b, char c, char d, char e, char f, char g, char h, char i, char j, char k, char l, char m, char n, char o, char p) {
+  __v16qi v = (__v16qi)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_m256(__m256 v, float a, float b, float c, float d, float e, float f, float g, float h) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_m256d(__m256d v, double a, double b, double c, double d) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_m256i(__m256i v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_m512(__m512 v, float a, float b, float c, float d, float e, float f, float g, float h, float i, float j, float k, float l, float m, float n, float o, float p) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] == l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_m512d(__m512d v, double a, double b, double c, double d, double e, double f, double g, double h) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_m512i(__m512i v, unsigned long long a, unsigned long long b, unsigned long long c, unsigned long long d, unsigned long long e, unsigned long long f, unsigned long long g, unsigned long long h) {
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] == f && v[6] == g && v[7] == h;
+}
+
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+
+#else
+
+#define TEST_CONSTEXPR(...)
+
+#endif
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
index 944033724a6a2bb..a4d887f0be41646 100644
--- a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -verify
 
 #include <immintrin.h>
 __m512i test_mm512_loadrs_epi8(const __m512i * __A) {
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
index 68608b0cbff09f7..2a7204e39b8300e 100644
--- a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -verify
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -verify
 
 #include <immintrin.h>
 __m128i test_mm_loadrs_epi8(const __m128i * __A) {
diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 000000000000000..0e54bd008d4fb0e
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4rnds4_epi32(__A, __B);
+}
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 391e049a6ae3ef0..f779ab07a266408 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -5,6 +5,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
 
@@ -13,6 +14,7 @@ __m128 test_mm_add_ps(__m128 A, __m128 B) {
   // CHECK: fadd <4 x float>
   return _mm_add_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_add_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +4.0f, +4.0f, +5.0f));
 
 __m128 test_mm_add_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_add_ss
@@ -22,12 +24,14 @@ __m128 test_mm_add_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_add_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_add_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +9.0f, +0.0f, +2.0f, +4.0f));
 
 __m128 test_mm_and_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_and_ps
   // CHECK: and <4 x i32>
   return _mm_and_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -0.0f, -0.0f, +0.0f, +7.0f));
 
 __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_andnot_ps
@@ -35,6 +39,7 @@ __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
   // CHECK: and <4 x i32>
   return _mm_andnot_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_andnot_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), +0.0f, +0.0f, +0.0f, +0.0f));
 
 __m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
   // CHECK-LABEL: test_mm_cmp_ps_eq_oq
@@ -322,6 +327,15 @@ __m128 test_mm_cvtsi32_ss(__m128 A, int B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtsi32_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_cvtsi32_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 42), +42.0f, +0.0f, +2.0f, +4.0f));
+
+__m128 test_mm_cvt_si2ss(__m128 A, int B) {
+  // CHECK-LABEL: test_mm_cvt_si2ss
+  // CHECK: sitofp i32 %{{.*}} to float
+  // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+  return _mm_cvt_si2ss(A, B);
+}
+TEST_CONSTEXPR(match_m128(_mm_cvt_si2ss((__m128){+4.0f, +2.0f, +0.0f, +4.0f}, -99), -99.0f, +2.0f, +0.0f, +4.0f));
 
 #ifdef __x86_64__
 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
@@ -330,6 +344,7 @@ __m128 test_mm_cvtsi64_ss(__m128 A, long long B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_cvtsi64_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_cvtsi64_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, 555), +555.0f, +0.0f, +2.0f, +4.0f));
 #endif
 
 float test_mm_cvtss_f32(__m128 A) {
@@ -337,6 +352,7 @@ float test_mm_cvtss_f32(__m128 A) {
   // CHECK: extractelement <4 x float> %{{.*}}, i32 0
   return _mm_cvtss_f32(A);
 }
+TEST_CONSTEXPR(_mm_cvtss_f32((__m128){+8.0f, +4.0f, +2.0f, +1.0f}) == +8.0f);
 
 int test_mm_cvtss_si32(__m128 A) {
   // CHECK-LABEL: test_mm_cvtss_si32
@@ -377,6 +393,7 @@ __m128 test_mm_div_ps(__m128 A, __m128 B) {
   // CHECK: fdiv <4 x float>
   return _mm_div_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_div_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +0.0f, +1.0f, +4.0f));
 
 __m128 test_mm_div_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_div_ss
@@ -386,6 +403,7 @@ __m128 test_mm_div_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_div_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_div_ss((__m128){+1.0f, +5.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +0.125f, +5.0f, +2.0f, +4.0f));
 
 unsigned int test_MM_GET_EXCEPTION_MASK(void) {
   // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK
@@ -517,18 +535,21 @@ __m128 test_mm_move_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_move_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_move_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +2.0f, +4.0f));
 
 __m128 test_mm_movehl_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_movehl_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
   return _mm_movehl_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_movehl_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +1.0f, +2.0f, +4.0f));
 
 __m128 test_mm_movelh_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_movelh_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   return _mm_movelh_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_movelh_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +0.0f, +8.0f, +4.0f));
 
 int test_mm_movemask_ps(__m128 A) {
   // CHECK-LABEL: test_mm_movemask_ps
@@ -541,6 +562,7 @@ __m128 test_mm_mul_ps(__m128 A, __m128 B) {
   // CHECK: fmul <4 x float>
   return _mm_mul_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
 
 __m128 test_mm_mul_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_mul_ss
@@ -550,12 +572,14 @@ __m128 test_mm_mul_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_mul_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_mul_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +8.0f, +0.0f, +4.0f, +4.0f));
 
 __m128 test_mm_or_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_or_ps
   // CHECK: or <4 x i32>
   return _mm_or_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_or_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, -5.0f, -6.0f, +7.0f));
 
 void test_mm_prefetch(char const* p) {
   // CHECK-LABEL: test_mm_prefetch
@@ -628,6 +652,7 @@ __m128 test_mm_set_ps(float A, float B, float C, float D) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_set_ps(A, B, C, D);
 }
+TEST_CONSTEXPR(match_m128(_mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f), +3.0f, +2.0f, +1.0f, +.0f));
 
 __m128 test_mm_set_ps1(float A) {
   // CHECK-LABEL: test_mm_set_ps1
@@ -637,6 +662,7 @@ __m128 test_mm_set_ps1(float A) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_set_ps1(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_set_ps1(-2.0f), -2.0f, -2.0f, -2.0f, -2.0f));
 
 void test_MM_SET_ROUNDING_MODE(unsigned int A) {
   // CHECK-LABEL: test_MM_SET_ROUNDING_MODE
@@ -657,6 +683,7 @@ __m128 test_mm_set_ss(float A) {
   // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3
   return _mm_set_ss(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_set_ss(1.0f), +1.0f, +0.0f, +0.0f, +0.0f));
 
 __m128 test_mm_set1_ps(float A) {
   // CHECK-LABEL: test_mm_set1_ps
@@ -666,6 +693,7 @@ __m128 test_mm_set1_ps(float A) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_set1_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_set1_ps(2.0f), +2.0f, +2.0f, +2.0f, +2.0f));
 
 void test_mm_setcsr(unsigned int A) {
   // CHECK-LABEL: test_mm_setcsr
@@ -682,12 +710,14 @@ __m128 test_mm_setr_ps(float A, float B, float C, float D) {
   // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3
   return _mm_setr_ps(A, B, C, D);
 }
+TEST_CONSTEXPR(match_m128(_mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f), +0.0f, +1.0f, +2.0f, +3.0f));
 
 __m128 test_mm_setzero_ps(void) {
   // CHECK-LABEL: test_mm_setzero_ps
   // CHECK: store <4 x float> zeroinitializer
   return _mm_setzero_ps();
 }
+TEST_CONSTEXPR(match_m128(_mm_setzero_ps(), +0.0f, +0.0f, +0.0f, +0.0f));
 
 void test_mm_sfence(void) {
   // CHECK-LABEL: test_mm_sfence
@@ -787,6 +817,7 @@ __m128 test_mm_sub_ps(__m128 A, __m128 B) {
   // CHECK: fsub <4 x float>
   return _mm_sub_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_sub_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, -4.0f, +0.0f, +3.0f));
 
 __m128 test_mm_sub_ss(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_sub_ss
@@ -796,6 +827,7 @@ __m128 test_mm_sub_ss(__m128 A, __m128 B) {
   // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
   return _mm_sub_ss(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_sub_ss((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), -7.0f, +0.0f, +2.0f, +4.0f));
 
 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) {
   // CHECK-LABEL: test_MM_TRANSPOSE4_PS
@@ -857,107 +889,18 @@ __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) {
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   return _mm_unpackhi_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_unpackhi_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +2.0f, +2.0f, +4.0f, +1.0f));
 
 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_unpacklo_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   return _mm_unpacklo_ps(A, B);
 }
+TEST_CONSTEXPR(match_m128(_mm_unpacklo_ps((__m128){+1.0f, +0.0f, +2.0f, +4.0f}, (__m128){+8.0f, +4.0f, +2.0f, +1.0f}), +1.0f, +8.0f, +0.0f, +4.0f));
 
 __m128 test_mm_xor_ps(__m128 A, __m128 B) {
   // CHECK-LABEL: test_mm_xor_ps
   // CHECK: xor <4 x i32>
   return _mm_xor_ps(A, B);
 }
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
-  constexpr __m128 k1 {+1.0f,+0.0f,+2.0f,+4.0f};
-  constexpr __m128 k2 {+8.0f,+4.0f,+2.0f,+1.0f};
-  constexpr __m128 k3 {-4.0f,-5.0f,+6.0f,+7.0f};
-  constexpr __m128 k4 {+0.0f,-0.0f,-0.0f,+0.0f};
-
-  constexpr __m128 v_mm_set_ss = _mm_set_ss(1.0f);
-  static_assert(v_mm_set_ss[0] == +1.0f && v_mm_set_ss[1] == +0.0f && v_mm_set_ss[2] == +0.0f && v_mm_set_ss[3] == +0.0f);
-
-  constexpr __m128 v_mm_set1_ps = _mm_set1_ps(2.0f);
-  static_assert(v_mm_set1_ps[0] == +2.0f && v_mm_set1_ps[1] == +2.0f && v_mm_set1_ps[2] == +2.0f && v_mm_set1_ps[3] == +2.0f);
-
-  constexpr __m128 v_mm_set_ps1 = _mm_set_ps1(-2.0f);
-  static_assert(v_mm_set_ps1[0] == -2.0f && v_mm_set_ps1[1] == -2.0f && v_mm_set_ps1[2] == -2.0f && v_mm_set_ps1[3] == -2.0f);
-
-  constexpr __m128 v_mm_set_ps = _mm_set_ps(+0.0f, +1.0f, +2.0f, +3.0f);
-  static_assert(v_mm_set_ps[0] == +3.0f && v_mm_set_ps[1] == +2.0f && v_mm_set_ps[2] == +1.0f && v_mm_set_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_setr_ps = _mm_setr_ps(+0.0f, +1.0f, +2.0f, +3.0f);
-  static_assert(v_mm_setr_ps[0] == +0.0f && v_mm_setr_ps[1] == +1.0f && v_mm_setr_ps[2] == +2.0f && v_mm_setr_ps[3] == +3.0f);
-
-  constexpr __m128 v_mm_setzero_ps = _mm_setzero_ps();
-  static_assert(v_mm_setzero_ps[0] == +0.0f && v_mm_setzero_ps[1] == +0.0f && v_mm_setzero_ps[2] == +0.0f && v_mm_setzero_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_add_ss = _mm_add_ss(k1, k2);
-  static_assert(v_mm_add_ss[0] == +9.0f && v_mm_add_ss[1] == +0.0f && v_mm_add_ss[2] == +2.0f && v_mm_add_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_add_ps = _mm_add_ps(k1, k2);
-  static_assert(v_mm_add_ps[0] == +9.0f && v_mm_add_ps[1] == +4.0f && v_mm_add_ps[2] == +4.0f && v_mm_add_ps[3] == +5.0f);
-
-  constexpr __m128 v_mm_sub_ss = _mm_sub_ss(k1, k2);
-  static_assert(v_mm_sub_ss[0] == -7.0f && v_mm_sub_ss[1] == +0.0f && v_mm_sub_ss[2] == +2.0f && v_mm_sub_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_sub_ps = _mm_sub_ps(k1, k2);
-  static_assert(v_mm_sub_ps[0] == -7.0f && v_mm_sub_ps[1] == -4.0f && v_mm_sub_ps[2] == +0.0f && v_mm_sub_ps[3] == +3.0f);
-
-  constexpr __m128 v_mm_mul_ss = _mm_mul_ss(k1, k2);
-  static_assert(v_mm_mul_ss[0] == +8.0f && v_mm_mul_ss[1] == +0.0f && v_mm_mul_ss[2] == +2.0f && v_mm_mul_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_mul_ps = _mm_mul_ps(k1, k2);
-  static_assert(v_mm_mul_ps[0] == +8.0f && v_mm_mul_ps[1] == +0.0f && v_mm_mul_ps[2] == +4.0f && v_mm_mul_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_div_ss = _mm_div_ss(k1, k2);
-  static_assert(v_mm_div_ss[0] == +0.125f && v_mm_div_ss[1] == +0.0f && v_mm_div_ss[2] == +2.0f && v_mm_div_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_div_ps = _mm_div_ps(k1, k2);
-  static_assert(v_mm_div_ps[0] == +0.125f && v_mm_div_ps[1] == +0.0f && v_mm_div_ps[2] == +1.0f && v_mm_div_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_and_ps = _mm_and_ps(k3, k4);
-  static_assert(v_mm_and_ps[0] == +0.0f && v_mm_and_ps[1] == +0.0f && v_mm_and_ps[2] == +0.0f && v_mm_and_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_andnot_ps = _mm_andnot_ps(k3, k4);
-  static_assert(v_mm_andnot_ps[0] == +0.0f && v_mm_andnot_ps[1] == +0.0f && v_mm_andnot_ps[2] == +0.0f && v_mm_andnot_ps[3] == +0.0f);
-
-  constexpr __m128 v_mm_or_ps = _mm_or_ps(k3, k4);
-  static_assert(v_mm_or_ps[0] == -4.0f && v_mm_or_ps[1] == -5.0f && v_mm_or_ps[2] == -6.0f && v_mm_or_ps[3] == +7.0f);
-
-  constexpr __m128 v_mm_xor_ps = _mm_xor_ps(k3, k4);
-  static_assert(v_mm_xor_ps[0] == -4.0f && v_mm_xor_ps[1] == +5.0f && v_mm_xor_ps[2] == -6.0f && v_mm_xor_ps[3] == +7.0f);
-
-  constexpr __m128 v_mm_unpackhi_ps = _mm_unpackhi_ps(k1, k2);
-  static_assert(v_mm_unpackhi_ps[0] == +2.0f && v_mm_unpackhi_ps[1] == +2.0f && v_mm_unpackhi_ps[2] == +4.0f && v_mm_unpackhi_ps[3] == +1.0f);
-
-  constexpr __m128 v_mm_unpacklo_ps = _mm_unpacklo_ps(k1, k2);
-  static_assert(v_mm_unpacklo_ps[0] == +1.0f && v_mm_unpacklo_ps[1] == +8.0f && v_mm_unpacklo_ps[2] == +0.0f && v_mm_unpacklo_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_move_ss = _mm_move_ss(k1, k2);
-  static_assert(v_mm_move_ss[0] == +8.0f && v_mm_move_ss[1] == +0.0f && v_mm_move_ss[2] == +2.0f && v_mm_move_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_movehl_ps = _mm_movehl_ps(k1, k2);
-  static_assert(v_mm_movehl_ps[0] == +2.0f && v_mm_movehl_ps[1] == +1.0f && v_mm_movehl_ps[2] == +2.0f && v_mm_movehl_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_movelh_ps = _mm_movelh_ps(k1, k2);
-  static_assert(v_mm_movelh_ps[0] == +1.0f && v_mm_movelh_ps[1] == +0.0f && v_mm_movelh_ps[2] == +8.0f && v_mm_movelh_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_cvtsi32_ss = _mm_cvtsi32_ss(k1, 42);
-  static_assert(v_mm_cvtsi32_ss[0] == 42.0f && v_mm_cvtsi32_ss[1] == +0.0f && v_mm_cvtsi32_ss[2] == +2.0f && v_mm_cvtsi32_ss[3] == +4.0f);
-
-  constexpr __m128 v_mm_cvt_si2ss = _mm_cvt_si2ss(k2, -99);
-  static_assert(v_mm_cvt_si2ss[0] == -99.0f && v_mm_cvt_si2ss[1] == +4.0f && v_mm_cvt_si2ss[2] == +2.0f && v_mm_cvt_si2ss[3] == +1.0f);
-
-  constexpr __m128 v_mm_cvtsi64_ss = _mm_cvtsi64_ss(k3, 555);
-  static_assert(v_mm_cvtsi64_ss[0] == 555.0f && v_mm_cvtsi64_ss[1] == -5.0f && v_mm_cvtsi64_ss[2] == +6.0f && v_mm_cvtsi64_ss[3] == +7.0f);
-
-  static_assert(_mm_cvtss_f32(k2) == +8.0f);
-}
-
-#endif
\ No newline at end of file
+TEST_CONSTEXPR(match_m128(_mm_xor_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m128){+0.0f, -0.0f, -0.0f, +7.0f}), -4.0f, +5.0f, -6.0f, +0.0f));
diff --git a/clang/test/CodeGen/X86/sse.c b/clang/test/CodeGen/X86/sse.c
index a75b8dc77e86e14..017bdd7846fa396 100644
--- a/clang/test/CodeGen/X86/sse.c
+++ b/clang/test/CodeGen/X86/sse.c
@@ -1,42 +1,72 @@
-// RUN: %clang_cc1 -ffreestanding -O3 -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
-// FIXME: This test currently depends on optimization - it should be rewritten to avoid it.
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-- -target-feature +sse4.1 -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
 
 
 #include <emmintrin.h>
 
 // Byte-shifts look reversed due to xmm register layout
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
+// CHECK-NEXT:    [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[CAST1]]
+//
 __m128i test_mm_slli_si128(__m128i a) {
-  // CHECK-LABEL: @test_mm_slli_si128
-  // CHECK: shufflevector <16 x i8> <{{.*}}, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> {{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
   return _mm_slli_si128(a, 5);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_0(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[PSLLDQ:%.*]] = shufflevector <16 x i8> zeroinitializer, <16 x i8> [[CAST]], <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+// CHECK-NEXT:    [[CAST1:%.*]] = bitcast <16 x i8> [[PSLLDQ]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[CAST1]]
+//
 __m128i test_mm_slli_si128_0(__m128i a) {
-  // CHECK-LABEL: @test_mm_slli_si128_0
-  // CHECK-NOT: shufflevector
   return _mm_slli_si128(a, 0);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm_slli_si128_16(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <2 x i64> zeroinitializer
+//
 __m128i test_mm_slli_si128_16(__m128i a) {
-  // CHECK-LABEL: @test_mm_slli_si128_16
-  // CHECK-NOT: shufflevector
   return _mm_slli_si128(a, 16);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+// CHECK-NEXT:    [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[CAST1]]
+//
 __m128i test_mm_srli_si128(__m128i a) {
-  // CHECK-LABEL: @test_mm_srli_si128
-  // CHECK: shufflevector <16 x i8> {{.*}}, <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, {{.*}}>, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
   return _mm_srli_si128(a, 5);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_0(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
+// CHECK-NEXT:    [[PSRLDQ:%.*]] = shufflevector <16 x i8> [[CAST]], <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+// CHECK-NEXT:    [[CAST1:%.*]] = bitcast <16 x i8> [[PSRLDQ]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[CAST1]]
+//
 __m128i test_mm_srli_si128_0(__m128i a) {
-  // CHECK-LABEL: @test_mm_srli_si128_0
-  // CHECK-NOT: shufflevector
   return _mm_srli_si128(a, 0);
 }
 
+// CHECK-LABEL: define dso_local <2 x i64> @test_mm_srli_si128_16(
+// CHECK-SAME: <2 x i64> noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <2 x i64> zeroinitializer
+//
 __m128i test_mm_srli_si128_16(__m128i a) {
-  // CHECK-LABEL: @test_mm_srli_si128_16
-  // CHECK-NOT: shufflevector
   return _mm_srli_si128(a, 16);
 }
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 0603ca5f78b6a12..c4493a49120543a 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -11,6 +11,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll
 
@@ -31,18 +32,21 @@ __m128i test_mm_add_epi32(__m128i A, __m128i B) {
   // CHECK: add <4 x i32>
   return _mm_add_epi32(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_add_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), -9, +6, +9, -8));
 
 __m128i test_mm_add_epi64(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_add_epi64
   // CHECK: add <2 x i64>
   return _mm_add_epi64(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_add_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), -4, +5));
 
 __m128d test_mm_add_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_add_pd
   // CHECK: fadd <2 x double>
   return _mm_add_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_add_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -8.0));
 
 __m128d test_mm_add_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_add_sd
@@ -52,6 +56,7 @@ __m128d test_mm_add_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_add_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_add_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +6.0, -3.0));
 
 __m128i test_mm_adds_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_adds_epi8
@@ -84,6 +89,7 @@ __m128d test_mm_and_pd(__m128d A, __m128d B) {
   // CHECK: and <2 x i64>
   return _mm_and_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_and_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
 
 __m128i test_mm_and_si128(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_and_si128
@@ -97,6 +103,7 @@ __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
   // CHECK: and <2 x i64>
   return _mm_andnot_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +0.0, -0.0));
 
 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_andnot_si128
@@ -133,11 +140,13 @@ __m128 test_mm_castpd_ps(__m128d A) {
   // CHECK-LABEL: test_mm_castpd_ps
   return _mm_castpd_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_castpd_ps((__m128d){-1.0, +2.0}), +0.0f, -1.875f, +0.0f, +2.0f));
 
 __m128i test_mm_castpd_si128(__m128d A) {
   // CHECK-LABEL: test_mm_castpd_si128
   return _mm_castpd_si128(A);
 }
+TEST_CONSTEXPR(match_m128i(_mm_castpd_si128((__m128d){-1.0, +2.0}), 0xBFF0000000000000ULL, 0x4000000000000000ULL));
 
 __m128d test_mm_castps_pd(__m128 A) {
   // CHECK-LABEL: test_mm_castps_pd
@@ -499,12 +508,14 @@ __m128d test_mm_cvtepi32_pd(__m128i A) {
   // CHECK: sitofp <2 x i32> %{{.*}} to <2 x double>
   return _mm_cvtepi32_pd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtepi32_pd((__m128i)(__v4si){-9, +8, -6, 0}), -9.0, +8.0));
 
 __m128 test_mm_cvtepi32_ps(__m128i A) {
   // CHECK-LABEL: test_mm_cvtepi32_ps
   // CHECK: sitofp <4 x i32> %{{.*}} to <4 x float>
   return _mm_cvtepi32_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_cvtepi32_ps((__m128i)(__v4si){-3, +2, -1, 0}), -3.0f, +2.0f, -1.0f, +0.0f));
 
 __m128i test_mm_cvtpd_epi32(__m128d A) {
   // CHECK-LABEL: test_mm_cvtpd_epi32
@@ -530,12 +541,14 @@ __m128d test_mm_cvtps_pd(__m128 A) {
   // CHECK: fpext <2 x float> %{{.*}} to <2 x double>
   return _mm_cvtps_pd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtps_pd((__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +2.0));
 
 double test_mm_cvtsd_f64(__m128d A) {
   // CHECK-LABEL: test_mm_cvtsd_f64
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
   return _mm_cvtsd_f64(A);
 }
+TEST_CONSTEXPR(_mm_cvtsd_f64((__m128d){-4.0, +8.0}) == -4.0);
 
 int test_mm_cvtsd_si32(__m128d A) {
   // CHECK-LABEL: test_mm_cvtsd_si32
@@ -575,6 +588,7 @@ __m128d test_mm_cvtsi32_sd(__m128d A, int B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtsi32_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtsi32_sd((__m128d){-99.0, +42.0}, 55), +55.0, +42.0));
 
 __m128i test_mm_cvtsi32_si128(int A) {
   // CHECK-LABEL: test_mm_cvtsi32_si128
@@ -608,6 +622,7 @@ __m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_cvtss_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_cvtss_sd((__m128d){+32.0, +8.0}, (__m128){-1.0f, +2.0f, -3.0f, +4.0f}), -1.0, +8.0));
 
 __m128i test_mm_cvttpd_epi32(__m128d A) {
   // CHECK-LABEL: test_mm_cvttpd_epi32
@@ -640,6 +655,7 @@ __m128d test_mm_div_pd(__m128d A, __m128d B) {
   // CHECK: fdiv <2 x double>
   return _mm_div_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_div_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +4.0));
 
 __m128d test_mm_div_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_div_sd
@@ -649,6 +665,7 @@ __m128d test_mm_div_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_div_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_div_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -0.5, +8.0));
 
 // Lowering to pextrw requires optimization.
 int test_mm_extract_epi16(__m128i A) {
@@ -873,6 +890,7 @@ __m128d test_mm_move_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_move_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_move_sd((__m128d){+2.0, +8.0}, (__m128d){-4.0, +2.0}), -4.0, +8.0));
 
 int test_mm_movemask_epi8(__m128i A) {
   // CHECK-LABEL: test_mm_movemask_epi8
@@ -899,6 +917,7 @@ __m128d test_mm_mul_pd(__m128d A, __m128d B) {
   // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
   return _mm_mul_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_mul_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, +15.0));
 
 __m128d test_mm_mul_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_mul_sd
@@ -908,6 +927,7 @@ __m128d test_mm_mul_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_mul_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_mul_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), +5.0, -3.0));
 
 __m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_mulhi_epi16
@@ -932,6 +952,7 @@ __m128d test_mm_or_pd(__m128d A, __m128d B) {
   // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
   return _mm_or_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_or_pd((__m128d){+1.0, -3.0}, (__m128d){-0.0, +0.0}), -1.0, -3.0));
 
 __m128i test_mm_or_si128(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_or_si128
@@ -992,6 +1013,7 @@ __m128i test_mm_set_epi8(char A, char B, char C, char D,
   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
   return _mm_set_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
 
 __m128i test_mm_set_epi16(short A, short B, short C, short D,
                           short E, short F, short G, short H) {
@@ -1006,6 +1028,7 @@ __m128i test_mm_set_epi16(short A, short B, short C, short D,
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
   return _mm_set_epi16(A, B, C, D, E, F, G, H);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_set_epi16(0, -1, -2, -3, -4, -5, -6, -7), -7, -6, -5, -4, -3, -2, -1, 0));
 
 __m128i test_mm_set_epi32(int A, int B, int C, int D) {
   // CHECK-LABEL: test_mm_set_epi32
@@ -1015,6 +1038,7 @@ __m128i test_mm_set_epi32(int A, int B, int C, int D) {
   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
   return _mm_set_epi32(A, B, C, D);
 }
+TEST_CONSTEXPR(match_v4si(_mm_set_epi32(1, -3, 5, -7), -7, 5, -3, 1));
 
 __m128i test_mm_set_epi64(__m64 A, __m64 B) {
   // CHECK-LABEL: test_mm_set_epi64
@@ -1022,6 +1046,7 @@ __m128i test_mm_set_epi64(__m64 A, __m64 B) {
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
   return _mm_set_epi64(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_set_epi64((__m64){-1}, (__m64){42}), 42, -1));
 
 __m128i test_mm_set_epi64x(long long A, long long B) {
   // CHECK-LABEL: test_mm_set_epi64x
@@ -1029,6 +1054,7 @@ __m128i test_mm_set_epi64x(long long A, long long B) {
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
   return _mm_set_epi64x(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_set_epi64x(100, -1000), -1000, 100));
 
 __m128d test_mm_set_pd(double A, double B) {
   // CHECK-LABEL: test_mm_set_pd
@@ -1036,6 +1062,7 @@ __m128d test_mm_set_pd(double A, double B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_set_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_set_pd(-9.0, +3.0), +3.0, -9.0));
 
 __m128d test_mm_set_pd1(double A) {
   // CHECK-LABEL: test_mm_set_pd1
@@ -1043,6 +1070,7 @@ __m128d test_mm_set_pd1(double A) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_set_pd1(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_set_pd1(+5.0), +5.0, +5.0));
 
 __m128d test_mm_set_sd(double A) {
   // CHECK-LABEL: test_mm_set_sd
@@ -1050,6 +1078,7 @@ __m128d test_mm_set_sd(double A) {
   // CHECK: insertelement <2 x double> %{{.*}}, double 0.000000e+00, i32 1
   return _mm_set_sd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_set_sd(+1.0), +1.0, +0.0));
 
 __m128i test_mm_set1_epi8(char A) {
   // CHECK-LABEL: test_mm_set1_epi8
@@ -1071,6 +1100,7 @@ __m128i test_mm_set1_epi8(char A) {
   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
   return _mm_set1_epi8(A);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_set1_epi8(99), 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99));
 
 __m128i test_mm_set1_epi16(short A) {
   // CHECK-LABEL: test_mm_set1_epi16
@@ -1084,6 +1114,7 @@ __m128i test_mm_set1_epi16(short A) {
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
   return _mm_set1_epi16(A);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_set1_epi16(-128), -128, -128, -128, -128, -128, -128, -128, -128));
 
 __m128i test_mm_set1_epi32(int A) {
   // CHECK-LABEL: test_mm_set1_epi32
@@ -1093,6 +1124,7 @@ __m128i test_mm_set1_epi32(int A) {
   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
   return _mm_set1_epi32(A);
 }
+TEST_CONSTEXPR(match_v4si(_mm_set1_epi32(55), 55, 55, 55, 55));
 
 __m128i test_mm_set1_epi64(__m64 A) {
   // CHECK-LABEL: test_mm_set1_epi64
@@ -1100,6 +1132,7 @@ __m128i test_mm_set1_epi64(__m64 A) {
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
   return _mm_set1_epi64(A);
 }
+TEST_CONSTEXPR(match_v2di(_mm_set1_epi64((__m64){-65535}), -65535, -65535));
 
 __m128i test_mm_set1_epi64x(long long A) {
   // CHECK-LABEL: test_mm_set1_epi64x
@@ -1107,6 +1140,7 @@ __m128i test_mm_set1_epi64x(long long A) {
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
   return _mm_set1_epi64x(A);
 }
+TEST_CONSTEXPR(match_v2di(_mm_set1_epi64x(65536), 65536, 65536));
 
 __m128d test_mm_set1_pd(double A) {
   // CHECK-LABEL: test_mm_set1_pd
@@ -1114,6 +1148,7 @@ __m128d test_mm_set1_pd(double A) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_set1_pd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_set1_pd(-42.0), -42.0, -42.0));
 
 __m128i test_mm_setr_epi8(char A, char B, char C, char D,
                           char E, char F, char G, char H,
@@ -1138,6 +1173,7 @@ __m128i test_mm_setr_epi8(char A, char B, char C, char D,
   // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 15
   return _mm_setr_epi8(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
 
 __m128i test_mm_setr_epi16(short A, short B, short C, short D,
                            short E, short F, short G, short H) {
@@ -1152,6 +1188,7 @@ __m128i test_mm_setr_epi16(short A, short B, short C, short D,
   // CHECK: insertelement <8 x i16> %{{.*}}, i16 %{{.*}}, i32 7
   return _mm_setr_epi16(A, B, C, D, E, F, G, H);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_setr_epi16(0, -1, -2, -3, -4, -5, -6, -7), 0, -1, -2, -3, -4, -5, -6, -7));
 
 __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
   // CHECK-LABEL: test_mm_setr_epi32
@@ -1161,6 +1198,7 @@ __m128i test_mm_setr_epi32(int A, int B, int C, int D) {
   // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 3
   return _mm_setr_epi32(A, B, C, D);
 }
+TEST_CONSTEXPR(match_v4si(_mm_setr_epi32(1, -3, 5, -7), 1, -3, 5, -7));
 
 __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
   // CHECK-LABEL: test_mm_setr_epi64
@@ -1168,6 +1206,7 @@ __m128i test_mm_setr_epi64(__m64 A, __m64 B) {
   // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1
   return _mm_setr_epi64(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_setr_epi64((__m64){-1}, (__m64){42}), -1, 42));
 
 __m128d test_mm_setr_pd(double A, double B) {
   // CHECK-LABEL: test_mm_setr_pd
@@ -1175,18 +1214,21 @@ __m128d test_mm_setr_pd(double A, double B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
   return _mm_setr_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_setr_pd(-9.0, +3.0), -9.0, +3.0));
 
 __m128d test_mm_setzero_pd(void) {
   // CHECK-LABEL: test_mm_setzero_pd
   // CHECK: store <2 x double> zeroinitializer
   return _mm_setzero_pd();
 }
+TEST_CONSTEXPR(match_m128d(_mm_setzero_pd(), +0.0, +0.0));
 
 __m128i test_mm_setzero_si128(void) {
   // CHECK-LABEL: test_mm_setzero_si128
   // CHECK: store <2 x i64> zeroinitializer
   return _mm_setzero_si128();
 }
+TEST_CONSTEXPR(match_m128i(_mm_setzero_si128(), 0, 0));
 
 __m128i test_mm_shuffle_epi32(__m128i A) {
   // CHECK-LABEL: test_mm_shuffle_epi32
@@ -1608,18 +1650,21 @@ __m128i test_mm_sub_epi32(__m128i A, __m128i B) {
   // CHECK: sub <4 x i32>
   return _mm_sub_epi32(A, B);
 }
+TEST_CONSTEXPR(match_v4si(_mm_sub_epi32((__m128i)(__v4si){+1, -2, +3, -4}, (__m128i)(__v4si){-10, +8, +6, -4}), +11, -10, -3, 0));
 
 __m128i test_mm_sub_epi64(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_sub_epi64
   // CHECK: sub <2 x i64>
   return _mm_sub_epi64(A, B);
 }
+TEST_CONSTEXPR(match_v2di(_mm_sub_epi64((__m128i)(__v2di){+5, -3}, (__m128i)(__v2di){-9, +8}), +14, -11));
 
 __m128d test_mm_sub_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_sub_pd
   // CHECK: fsub <2 x double>
   return _mm_sub_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_sub_pd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, +2.0));
 
 __m128d test_mm_sub_sd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_sub_sd
@@ -1629,6 +1674,7 @@ __m128d test_mm_sub_sd(__m128d A, __m128d B) {
   // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
   return _mm_sub_sd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_sub_sd((__m128d){+1.0, -3.0}, (__m128d){+5.0, -5.0}), -4.0, -3.0));
 
 __m128i test_mm_subs_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_subs_epi8
@@ -1736,6 +1782,7 @@ __m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
   return _mm_unpackhi_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_unpackhi_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +8.0, -2.0));
 
 __m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_unpacklo_epi8
@@ -1766,123 +1813,17 @@ __m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
   return _mm_unpacklo_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_unpacklo_pd((__m128d){+2.0, +8.0}, (__m128d){-4.0, -2.0}), +2.0, -4.0));
 
 __m128d test_mm_xor_pd(__m128d A, __m128d B) {
   // CHECK-LABEL: test_mm_xor_pd
   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
   return _mm_xor_pd(A, B);
 }
+TEST_CONSTEXPR(match_m128d(_mm_xor_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0, -0.0}), +1.0, +3.0));
 
 __m128i test_mm_xor_si128(__m128i A, __m128i B) {
   // CHECK-LABEL: test_mm_xor_si128
   // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
   return _mm_xor_si128(A, B);
 }
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
-  constexpr __m128d kd1 {+2.0,-1.0};
-  constexpr __m128d kd2 {-4.0,-2.0};
-  constexpr __m128d kd3 {-0.0,+0.0};
-
-  constexpr __m128 kf1 {-1.0f,+2.0f,-3.0f,+4.0f};
-
-  constexpr __m64 km1 {0x00000080FFFFFFF0ULL}; // -16,+128
-  constexpr __m128i ki1 {0x00000010FFFFFFF8ULL, 0x00000001FFFFFFFFULL}; // -8,+16,-1,1
-
-  constexpr __m128d v_mm_set_sd = _mm_set_sd(1.0);
-  static_assert(v_mm_set_sd[0] == +1.0 && v_mm_set_sd[1] == +0.0);
-
-  constexpr __m128d v_mm_set1_pd = _mm_set1_pd(2.0);
-  static_assert(v_mm_set1_pd[0] == +2.0 && v_mm_set1_pd[1] == +2.0);
-
-  constexpr __m128d v_mm_set_pd1 = _mm_set_pd1(-2.0);
-  static_assert(v_mm_set_pd1[0] == -2.0 && v_mm_set_pd1[1] == -2.0);
-
-  constexpr __m128d v_mm_set_pd = _mm_set_pd(+2.0, +3.0);
-  static_assert(v_mm_set_pd[0] == +3.0 && v_mm_set_pd[1] == +2.0);
-
-  constexpr __m128d v_mm_setr_pd = _mm_setr_pd(+2.0, +3.0);
-  static_assert(v_mm_setr_pd[0] == +2.0 && v_mm_setr_pd[1] == +3.0);
-
-  constexpr __m128d v_mm_setzero_pd = _mm_setzero_pd();
-  static_assert(v_mm_setzero_pd[0] == +0.0 && v_mm_setzero_pd[1] == +0.0);
-
-  constexpr __m128i v_mm_setzero_si128 = _mm_setzero_si128();
-  static_assert(v_mm_setzero_si128[0] == 0x0000000000000000ULL && v_mm_setzero_si128[1] == 0x0000000000000000ULL);
-
-  constexpr __m128d v_mm_add_sd = _mm_add_sd(kd1, kd2);
-  static_assert(v_mm_add_sd[0] == -2.0 && v_mm_add_sd[1] == -1.0);
-
-  constexpr __m128d v_mm_add_pd = _mm_add_pd(kd1, kd2);
-  static_assert(v_mm_add_pd[0] == -2.0 && v_mm_add_pd[1] == -3.0);
-
-  constexpr __m128d v_mm_sub_sd = _mm_sub_sd(kd1, kd2);
-  static_assert(v_mm_sub_sd[0] == +6.0 && v_mm_sub_sd[1] == -1.0);
-
-  constexpr __m128d v_mm_sub_pd = _mm_sub_pd(kd1, kd2);
-  static_assert(v_mm_sub_pd[0] == +6.0 && v_mm_sub_pd[1] == +1.0);
-
-  constexpr __m128d v_mm_mul_sd = _mm_mul_sd(kd1, kd2);
-  static_assert(v_mm_mul_sd[0] == -8.0 && v_mm_mul_sd[1] == -1.0);
-
-  constexpr __m128d v_mm_mul_pd = _mm_mul_pd(kd1, kd2);
-  static_assert(v_mm_mul_pd[0] == -8.0 && v_mm_mul_pd[1] == +2.0);
-
-  constexpr __m128d v_mm_div_sd = _mm_div_sd(kd1, kd2);
-  static_assert(v_mm_div_sd[0] == -0.5 && v_mm_div_sd[1] == -1.0);
-
-  constexpr __m128d v_mm_div_pd = _mm_div_pd(kd1, kd2);
-  static_assert(v_mm_div_pd[0] == -0.5 && v_mm_div_pd[1] == +0.5);
-
-  constexpr __m128d v_mm_and_pd = _mm_and_pd(kd1, kd3);
-  static_assert(v_mm_and_pd[0] == +0.0 && v_mm_and_pd[1] == +0.0);
-
-  constexpr __m128d v_mm_andnot_pd = _mm_andnot_pd(kd1, kd3);
-  static_assert(v_mm_andnot_pd[0] == -0.0 && v_mm_andnot_pd[1] == +0.0);
-
-  constexpr __m128d v_mm_or_pd = _mm_or_pd(kd1, kd3);
-  static_assert(v_mm_or_pd[0] == -2.0 && v_mm_or_pd[1] == -1.0);
-
-  constexpr __m128d v_mm_xor_pd = _mm_xor_pd(kd2, kd3);
-  static_assert(v_mm_xor_pd[0] == +4.0 && v_mm_xor_pd[1] == -2.0);
-
-  constexpr __m128d v_mm_cvtps_pd = _mm_cvtps_pd(kf1);
-  static_assert(v_mm_cvtps_pd[0] == -1.0 && v_mm_cvtps_pd[1] == +2.0);
-
-  constexpr __m128d v_mm_cvtepi32_pd = _mm_cvtepi32_pd(ki1);
-  static_assert(v_mm_cvtepi32_pd[0] == -8.0 && v_mm_cvtepi32_pd[1] == +16.0);
-
-  constexpr __m128 v_mm_cvtepi32_ps = _mm_cvtepi32_ps(ki1);
-  static_assert(v_mm_cvtepi32_ps[0] == -8.0f && v_mm_cvtepi32_ps[1] == +16.0f && v_mm_cvtepi32_ps[2] == -1.0f && v_mm_cvtepi32_ps[3] == +1.0f);
-
-  constexpr __m128d v_mm_cvtsi32_sd = _mm_cvtsi32_sd(kd1, 8);
-  static_assert(v_mm_cvtsi32_sd[0] == +8.0 && v_mm_cvtsi32_sd[1] == -1.0);
-
-  constexpr __m128d v_mm_cvtss_sd = _mm_cvtss_sd(kd2, kf1);
-  static_assert(v_mm_cvtss_sd[0] == -1.0 && v_mm_cvtss_sd[1] == -2.0);
-
-  constexpr __m128d v_mm_cvtpi32_pd = _mm_cvtpi32_pd(km1);
-  static_assert(v_mm_cvtpi32_pd[0] == -16.0 && v_mm_cvtpi32_pd[1] == 128.0);
-
-  static_assert(_mm_cvtsd_f64(kd2) == -4.0);
-
-  constexpr __m128d v_mm_move_sd = _mm_move_sd(kd1, kd2);
-  static_assert(v_mm_move_sd[0] == -4.0 && v_mm_move_sd[1] == -1.0);
-
-  constexpr __m128d v_mm_unpackhi_pd = _mm_unpackhi_pd(kd1, kd2);
-  static_assert(v_mm_unpackhi_pd[0] == -1.0f && v_mm_unpackhi_pd[1] == -2.0f);
-
-  constexpr __m128d v_mm_unpacklo_pd = _mm_unpacklo_pd(kd1, kd2);
-  static_assert(v_mm_unpacklo_pd[0] == +2.0f && v_mm_unpacklo_pd[1] == -4.0f);
-
-  constexpr __m128 v_mm_castpd_ps = _mm_castpd_ps(kd3);
-  static_assert(v_mm_castpd_ps[0] == -0.0f && v_mm_castpd_ps[1] == +0.0f && v_mm_castpd_ps[2] == +0.0f && v_mm_castpd_ps[3] == +0.0f);
-
-  constexpr __m128i v_mm_castpd_si128 = _mm_castpd_si128(kd3);
-  static_assert(v_mm_castpd_si128[0] == 0x8000000000000000ULL && v_mm_castpd_si128[1] == 0x0000000000000000ULL);
-}
-
-#endif
diff --git a/clang/test/CodeGen/X86/sse3-builtins.c b/clang/test/CodeGen/X86/sse3-builtins.c
index 18c062f4c14a7db..d47c19b882cd1ef 100644
--- a/clang/test/CodeGen/X86/sse3-builtins.c
+++ b/clang/test/CodeGen/X86/sse3-builtins.c
@@ -5,6 +5,7 @@
 
 
 #include <immintrin.h>
+#include "builtin_test_helpers.h"
 
 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
 
@@ -63,34 +64,18 @@ __m128d test_mm_movedup_pd(__m128d A) {
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
   return _mm_movedup_pd(A);
 }
+TEST_CONSTEXPR(match_m128d(_mm_movedup_pd((__m128d){+7.0, -7.0}), +7.0, +7.0));
 
 __m128 test_mm_movehdup_ps(__m128 A) {
   // CHECK-LABEL: test_mm_movehdup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
   return _mm_movehdup_ps(A);
 }
+TEST_CONSTEXPR(match_m128(_mm_movehdup_ps((__m128){+1.0f,-1.0f,+2.0f,+4.0f}), -1.0f, -1.0f, +4.0f, +4.0f));
 
 __m128 test_mm_moveldup_ps(__m128 A) {
   // CHECK-LABEL: test_mm_moveldup_ps
   // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
   return _mm_moveldup_ps(A);
 }
-
-// Test constexpr handling.
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-void test_constexpr() {
-  constexpr __m128d kd1 {+7.0,-7.0};
-  constexpr __m128 kf1 {+1.0f,-1.0f,+2.0f,+4.0f};
-
-  constexpr __m128d v_mm_movedup_pd = _mm_movedup_pd(kd1);
-  static_assert(v_mm_movedup_pd[0] == +7.0 && v_mm_movedup_pd[1] == +7.0);
-
-  constexpr __m128 v_mm_movehdup_ps = _mm_movehdup_ps(kf1);
-  static_assert(v_mm_movehdup_ps[0] == -1.0f && v_mm_movehdup_ps[1] == -1.0f && v_mm_movehdup_ps[2] == +4.0f && v_mm_movehdup_ps[3] == +4.0f);
-
-  constexpr __m128 v_mm_moveldup_ps = _mm_moveldup_ps(kf1);
-  static_assert(v_mm_moveldup_ps[0] == +1.0f && v_mm_moveldup_ps[1] == +1.0f && v_mm_moveldup_ps[2] == +2.0f && v_mm_moveldup_ps[3] == +2.0f);
-}
-
-#endif
+TEST_CONSTEXPR(match_m128(_mm_moveldup_ps((__m128){+1.0f,-1.0f,+2.0f,+4.0f}), +1.0f, +1.0f, +2.0f, +2.0f));
diff --git a/clang/test/CodeGen/aarch64-cpu-supports-target.c b/clang/test/CodeGen/aarch64-cpu-supports-target.c
index 5186cab92a921d7..e3a75e9a1fc7d39 100644
--- a/clang/test/CodeGen/aarch64-cpu-supports-target.c
+++ b/clang/test/CodeGen/aarch64-cpu-supports-target.c
@@ -9,9 +9,9 @@ int check_all_feature() {
     return 3;
   else if (__builtin_cpu_supports("fcma+rcpc+rcpc2+rcpc3+frintts+dgh"))
     return 4;
-  else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve+sve-bf16"))
+  else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve"))
     return 5;
-  else if (__builtin_cpu_supports("sve-ebf16+sve-i8mm+f32mm+f64mm"))
+  else if (__builtin_cpu_supports("sve+ebf16+i8mm+f32mm+f64mm"))
     return 6;
   else if (__builtin_cpu_supports("sve2+sve2-aes+sve2-pmull128"))
     return 7;
diff --git a/clang/test/CodeGen/aarch64-debug-types.c b/clang/test/CodeGen/aarch64-debug-types.c
new file mode 100644
index 000000000000000..f1ab74c5c31bdb1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-debug-types.c
@@ -0,0 +1,12 @@
+// RUN:  %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon  -target-feature +fp8 \
+// RUN:  -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#include<arm_neon.h>
+
+void test_locals(void) {
+  // CHECK-DAG: !DIDerivedType(tag: DW_TAG_typedef, name: "__MFloat8_t", {{.*}}, baseType: ![[ELTTYU8:[0-9]+]]
+  // CHECK-DAG: ![[ELTTYU8]] = !DIBasicType(name: "__MFloat8_t", size: 8, encoding: DW_ATE_unsigned_char)
+  __MFloat8_t mfp8;
+}
diff --git a/clang/test/CodeGen/aarch64-fmv-dependencies.c b/clang/test/CodeGen/aarch64-fmv-dependencies.c
index 6d230007f91ff95..db6be423b99f788 100644
--- a/clang/test/CodeGen/aarch64-fmv-dependencies.c
+++ b/clang/test/CodeGen/aarch64-fmv-dependencies.c
@@ -135,15 +135,6 @@ __attribute__((target_version("ssbs"))) int fmv(void) { return 0; }
 // CHECK: define dso_local i32 @fmv._Msve() #[[sve:[0-9]+]] {
 __attribute__((target_version("sve"))) int fmv(void) { return 0; }
 
-// CHECK: define dso_local i32 @fmv._Msve-bf16() #[[sve_bf16_ebf16:[0-9]+]] {
-__attribute__((target_version("sve-bf16"))) int fmv(void) { return 0; }
-
-// CHECK: define dso_local i32 @fmv._Msve-ebf16() #[[sve_bf16_ebf16:[0-9]+]] {
-__attribute__((target_version("sve-ebf16"))) int fmv(void) { return 0; }
-
-// CHECK: define dso_local i32 @fmv._Msve-i8mm() #[[sve_i8mm:[0-9]+]] {
-__attribute__((target_version("sve-i8mm"))) int fmv(void) { return 0; }
-
 // CHECK: define dso_local i32 @fmv._Msve2() #[[sve2:[0-9]+]] {
 __attribute__((target_version("sve2"))) int fmv(void) { return 0; }
 
@@ -209,8 +200,6 @@ int caller() {
 // CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a"
 // CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a"
 // CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
-// CHECK: attributes #[[sve_bf16_ebf16]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a"
-// CHECK: attributes #[[sve_i8mm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+i8mm,+neon,+outline-atomics,+sve,+v8a"
 // CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a"
 // CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-aes,+v8a"
 // CHECK: attributes #[[sve2_bitperm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-bitperm,+v8a"
diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c
new file mode 100644
index 000000000000000..4bced01d5c71fa9
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-fpm-helpers.c
@@ -0,0 +1,165 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c   -DUSE_NEON_H  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c   -DUSE_SVE_H   %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c   -DUSE_SME_H   %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_NEON_H  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SVE_H   %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SME_H   %s -o - | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef USE_NEON_H
+#include "arm_neon.h"
+#endif
+
+#ifdef USE_SVE_H
+#include "arm_sve.h"
+#endif
+
+#ifdef USE_SME_H
+#include "arm_sme.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define INIT_ZERO 0
+#define INIT_ONES 0xffffffffffffffffU
+
+// CHECK-LABEL: define dso_local noundef i64 @test_init(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 0
+//
+fpm_t test_init() { return __arm_fpm_init(); }
+
+// CHECK-LABEL: define dso_local noundef i64 @test_src1_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 -8
+//
+fpm_t test_src1_1() {
+  return __arm_set_fpm_src1_format(INIT_ONES, __ARM_FPM_E5M2);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_src1_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 1
+//
+fpm_t test_src1_2() {
+  return __arm_set_fpm_src1_format(INIT_ZERO, __ARM_FPM_E4M3);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_src2_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 -57
+//
+fpm_t test_src2_1() {
+  return __arm_set_fpm_src2_format(INIT_ONES, __ARM_FPM_E5M2);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_src2_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 8
+//
+fpm_t test_src2_2() {
+  return __arm_set_fpm_src2_format(INIT_ZERO, __ARM_FPM_E4M3);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_dst1_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 -449
+//
+fpm_t test_dst1_1() {
+  return __arm_set_fpm_dst_format(INIT_ONES, __ARM_FPM_E5M2);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_dst2_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 64
+//
+fpm_t test_dst2_2() {
+  return __arm_set_fpm_dst_format(INIT_ZERO, __ARM_FPM_E4M3);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 -16385
+//
+fpm_t test_of_mul_1() {
+  return __arm_set_fpm_overflow_mul(INIT_ONES, __ARM_FPM_INFNAN);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_of_mul_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 16384
+//
+fpm_t test_of_mul_2() {
+  return __arm_set_fpm_overflow_mul(INIT_ZERO, __ARM_FPM_SATURATE);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 -32769
+//
+fpm_t test_of_cvt_1() {
+  return __arm_set_fpm_overflow_cvt(INIT_ONES, __ARM_FPM_INFNAN);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_of_cvt_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 32768
+//
+fpm_t test_of_cvt_2() {
+  return __arm_set_fpm_overflow_cvt(INIT_ZERO, __ARM_FPM_SATURATE);
+}
+
+// CHECK-LABEL: define dso_local noundef i64 @test_lscale(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 8323072
+//
+fpm_t test_lscale() { return __arm_set_fpm_lscale(INIT_ZERO, 127); }
+
+// CHECK-LABEL: define dso_local noundef i64 @test_lscale2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 270582939648
+//
+fpm_t test_lscale2() { return __arm_set_fpm_lscale2(INIT_ZERO, 63); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_1(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 2147483648
+//
+fpm_t test_nscale_1() { return __arm_set_fpm_nscale(INIT_ZERO, -128); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_2(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 2130706432
+//
+fpm_t test_nscale_2() { return __arm_set_fpm_nscale(INIT_ZERO, 127); }
+
+// CHECK-LABEL: define dso_local noundef range(i64 0, 4294967296) i64 @test_nscale_3(
+// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret i64 4278190080
+//
+fpm_t test_nscale_3() { return __arm_set_fpm_nscale(INIT_ZERO, -1); }
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c b/clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c
new file mode 100644
index 000000000000000..546910068c78a22
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-pure-scalable-args-empty-union.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1        -O3 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-C
+// RUN: %clang_cc1 -x c++ -O3 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK-CXX
+
+typedef __SVFloat32_t fvec32 __attribute__((arm_sve_vector_bits(128)));
+
+// PST containing an empty union: when compiled as C pass it in registers,
+// when compiled as C++ - in memory.
+typedef struct {
+  fvec32 x[4];
+  union {} u;
+} S0;
+
+#ifdef __cplusplus
+extern "C"
+#endif
+void use0(S0);
+
+void f0(S0 *p) {
+  use0(*p);
+}
+// CHECK-C:   declare void @use0(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+// CHECK-CXX: declare void @use0(ptr noundef)
+
+#ifdef __cplusplus
+
+// PST containing an empty union with `[[no_unique_address]]`` - pass in registers.
+typedef struct {
+   fvec32 x[4];
+   [[no_unique_address]]
+   union {} u;
+} S1;
+
+extern "C" void use1(S1);
+void f1(S1 *p) {
+  use1(*p);
+}
+// CHECK-CXX: declare void @use1(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+#endif // __cplusplus
diff --git a/clang/test/CodeGen/aarch64-pure-scalable-args.c b/clang/test/CodeGen/aarch64-pure-scalable-args.c
new file mode 100644
index 000000000000000..851159ada767495
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-pure-scalable-args.c
@@ -0,0 +1,461 @@
+// RUN: %clang_cc1 -O3 -triple aarch64                                  -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+// RUN: %clang_cc1 -O3 -triple arm64-apple-ios7.0 -target-abi darwinpcs -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-DARWIN
+// RUN: %clang_cc1 -O3 -triple aarch64-linux-gnu                        -target-feature +sve -target-feature +sve2p1 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-AAPCS
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+#include <arm_sve.h>
+#include <stdarg.h>
+
+typedef svfloat32_t fvec32 __attribute__((arm_sve_vector_bits(128)));
+typedef svfloat64_t fvec64 __attribute__((arm_sve_vector_bits(128)));
+typedef svbool_t bvec __attribute__((arm_sve_vector_bits(128)));
+typedef svmfloat8_t mfvec8 __attribute__((arm_sve_vector_bits(128)));
+
+typedef struct {
+    float f[4];
+} HFA;
+
+typedef struct {
+    mfloat8x16_t f[4];
+} HVA;
+
+// Pure Scalable Type, needs 4 Z-regs, 2 P-regs
+typedef struct {
+     bvec a;
+     fvec64 x;
+     fvec32 y[2];
+     mfvec8 z;
+     bvec b;
+} PST;
+
+// Pure Scalable Type, 1 Z-reg
+typedef struct {
+    fvec32 x;
+} SmallPST;
+
+// Big PST, does not fit in registers.
+typedef struct {
+    struct {
+        bvec a;
+        fvec32 x[4];
+    } u[2];
+    fvec64 v;
+} BigPST;
+
+// A small aggregate type
+typedef struct  {
+    char data[16];
+} SmallAgg;
+
+// CHECK: %struct.PST = type { <2 x i8>, <2 x double>, [2 x <4 x float>], <16 x i8>, <2 x i8> }
+
+// Test argument passing of Pure Scalable Types by examining the generated
+// LLVM IR function declarations. A PST argument in C/C++ should map to:
+//   a) an `ptr` argument, if passed indirectly through memory
+//   b) a series of scalable vector arguments, if passed via registers
+
+// Simple argument passing, PST expanded into registers.
+//   a    -> p0
+//   b    -> p1
+//   x    -> q0
+//   y[0] -> q1
+//   y[1] -> q2
+//   z    -> q3
+void test_argpass_simple(PST *p) {
+    void argpass_simple_callee(PST);
+    argpass_simple_callee(*p);
+}
+// CHECK-AAPCS:      define dso_local void @test_argpass_simple(ptr nocapture noundef readonly %p)
+// CHECK-AAPCS-NEXT: entry:
+// CHECK-AAPCS-NEXT: %0 = load <2 x i8>, ptr %p, align 16
+// CHECK-AAPCS-NEXT: %cast.scalable = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %0, i64 0)
+// CHECK-AAPCS-NEXT: %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
+// CHECK-AAPCS-NEXT: %2 = getelementptr inbounds nuw i8, ptr %p, i64 16
+// CHECK-AAPCS-NEXT: %3 = load <2 x double>, ptr %2, align 16
+// CHECK-AAPCS-NEXT: %cast.scalable1 = tail call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> %3, i64 0)
+// CHECK-AAPCS-NEXT: %4 = getelementptr inbounds nuw i8, ptr %p, i64 32
+// CHECK-AAPCS-NEXT: %5 = load <4 x float>, ptr %4, align 16
+// CHECK-AAPCS-NEXT: %cast.scalable2 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %5, i64 0)
+// CHECK-AAPCS-NEXT: %6 = getelementptr inbounds nuw i8, ptr %p, i64 48
+// CHECK-AAPCS-NEXT: %7 = load <4 x float>, ptr %6, align 16
+// CHECK-AAPCS-NEXT: %cast.scalable3 = tail call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %7, i64 0)
+// CHECK-AAPCS-NEXT: %8 = getelementptr inbounds nuw i8, ptr %p, i64 64
+// CHECK-AAPCS-NEXT: %9 = load <16 x i8>, ptr %8, align 16
+// CHECK-AAPCS-NEXT: %cast.scalable4 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> %9, i64 0)
+// CHECK-AAPCS-NEXT: %10 = getelementptr inbounds nuw i8, ptr %p, i64 80
+// CHECK-AAPCS-NEXT: %11 = load <2 x i8>, ptr %10, align 16
+// CHECK-AAPCS-NEXT: %cast.scalable5 = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %11, i64 0)
+// CHECK-AAPCS-NEXT: %12 = bitcast <vscale x 2 x i8> %cast.scalable5 to <vscale x 16 x i1>
+// CHECK-AAPCS-NEXT: tail call void @argpass_simple_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12)
+// CHECK-AAPCS-NEXT: ret void
+
+// CHECK-AAPCS:  declare void @argpass_simple_callee(<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @argpass_simple_callee(ptr noundef)
+
+// Boundary case of using the last available Z-reg, PST expanded.
+//   0.0  -> d0-d3
+//   a    -> p0
+//   b    -> p1
+//   x    -> q4
+//   y[0] -> q5
+//   y[1] -> q6
+//   z    -> q7
+void test_argpass_last_z(PST *p) {
+    void argpass_last_z_callee(double, double, double, double, PST);
+    argpass_last_z_callee(.0, .0, .0, .0, *p);
+}
+// CHECK-AAPCS:  declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @argpass_last_z_callee(double noundef, double noundef, double noundef, double noundef, ptr noundef)
+
+
+// Like the above, but using a tuple type to occupy some registers.
+//   x    -> z0.d-z3.d
+//   a    -> p0
+//   b    -> p1
+//   x    -> q4
+//   y[0] -> q5
+//   y[1] -> q6
+//   z    -> q7
+void test_argpass_last_z_tuple(PST *p, svfloat64x4_t x) {
+  void argpass_last_z_tuple_callee(svfloat64x4_t, PST);
+  argpass_last_z_tuple_callee(x, *p);
+}
+// CHECK-AAPCS:  declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @argpass_last_z_tuple_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, ptr noundef)
+
+
+// Boundary case of using the last available P-reg, PST expanded.
+//   false -> p0-p1
+//   a     -> p2
+//   b     -> p3
+//   x     -> q0
+//   y[0]  -> q1
+//   y[1]  -> q2
+//   z     -> q3
+void test_argpass_last_p(PST *p) {
+    void argpass_last_p_callee(svbool_t, svcount_t, PST);
+    argpass_last_p_callee(svpfalse(), svpfalse_c(), *p);
+}
+// CHECK-AAPCS:  declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @argpass_last_p_callee(<vscale x 16 x i1>, target("aarch64.svcount"), ptr noundef)
+
+
+// Not enough Z-regs, push PST to memory and pass a pointer, Z-regs and
+// P-regs still available for other arguments
+//   u     -> z0
+//   v     -> q1
+//   w     -> q2
+//   0.0   -> d3-d4
+//   1     -> w0
+//   *p    -> memory, address -> x1
+//   2     -> w2
+//   3.0   -> d5
+//   true  -> p0
+void test_argpass_no_z(PST *p, double dummy, svmfloat8_t u, int8x16_t v, mfloat8x16_t w) {
+    void argpass_no_z_callee(svmfloat8_t, int8x16_t, mfloat8x16_t, double, double, int, PST, int, double, svbool_t);
+    argpass_no_z_callee(u, v, w, .0, .0, 1, *p, 2, 3.0, svptrue_b64());
+}
+// CHECK: declare void @argpass_no_z_callee(<vscale x 16 x i8>, <16 x i8> noundef, <16 x i8>, double noundef, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
+
+
+// Like the above, using a tuple to occupy some registers.
+//   x     -> z0.d-z3.d
+//   0.0   -> d4
+//   1     -> w0
+//   *p    -> memory, address -> x1
+//   2     -> w2
+//   3.0   -> d5
+//   true  -> p0
+void test_argpass_no_z_tuple_f64(PST *p, float dummy, svfloat64x4_t x) {
+  void argpass_no_z_tuple_f64_callee(svfloat64x4_t, double, int, PST, int,
+                                     double, svbool_t);
+  argpass_no_z_tuple_f64_callee(x, .0, 1, *p, 2, 3.0, svptrue_b64());
+}
+// CHECK: declare void @argpass_no_z_tuple_f64_callee(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
+
+
+// Likewise, using a different tuple.
+//   x     -> z0.d-z3.d
+//   0.0   -> d4
+//   1     -> w0
+//   *p    -> memory, address -> x1
+//   2     -> w2
+//   3.0   -> d5
+//   true  -> p0
+void test_argpass_no_z_tuple_mfp8(PST *p, float dummy, svmfloat8x4_t x) {
+  void argpass_no_z_tuple_mfp8_callee(svmfloat8x4_t, double, int, PST, int,
+                                      double, svbool_t);
+  argpass_no_z_tuple_mfp8_callee(x, .0, 1, *p, 2, 3.0, svptrue_b64());
+}
+// CHECK: declare void @argpass_no_z_tuple_mfp8_callee(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, double noundef, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
+
+
+// Not enough Z-regs (consumed by a HFA), PST passed indirectly
+//   0.0  -> d0
+//   *h   -> s1-s4
+//   1    -> w0
+//   *p   -> memory, address -> x1
+//   p    -> x1
+//   2    -> w2
+//   true -> p0
+void test_argpass_no_z_hfa(HFA *h, PST *p) {
+    void argpass_no_z_hfa_callee(double, HFA, int, PST, int, svbool_t);
+    argpass_no_z_hfa_callee(.0, *h, 1, *p, 2, svptrue_b64());
+}
+// CHECK-AAPCS:  declare void @argpass_no_z_hfa_callee(double noundef, [4 x float] alignstack(8), i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @argpass_no_z_hfa_callee(double noundef, [4 x float], i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
+
+// Not enough Z-regs (consumed by a HVA), PST passed indirectly
+//   0.0  -> d0
+//   *h   -> s1-s4
+//   1    -> w0
+//   *p   -> memory, address -> x1
+//   p    -> x1
+//   2    -> w2
+//   true -> p0
+void test_argpass_no_z_hva(HVA *h, PST *p) {
+    void argpass_no_z_hva_callee(double, HVA, int, PST, int, svbool_t);
+    argpass_no_z_hva_callee(.0, *h, 1, *p, 2, svptrue_b64());
+}
+// CHECK-AAPCS:  declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>] alignstack(16), i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @argpass_no_z_hva_callee(double noundef, [4 x <16 x i8>], i32 noundef, ptr noundef, i32 noundef, <vscale x 16 x i1>)
+
+// Not enough P-regs, PST passed indirectly, Z-regs and P-regs still available.
+//   true -> p0-p2
+//   1    -> w0
+//   *p   -> memory, address -> x1
+//   2    -> w2
+//   3.0  -> d0
+//   true -> p3
+void test_argpass_no_p(PST *p) {
+    void argpass_no_p_callee(svbool_t, svbool_t, svbool_t, int, PST, int, double, svbool_t);
+    argpass_no_p_callee(svptrue_b8(), svptrue_b16(), svptrue_b32(), 1, *p, 2, 3.0, svptrue_b64());
+}
+// CHECK: declare void @argpass_no_p_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
+
+
+// Like above, using a tuple to occupy some registers.
+// P-regs still available.
+//   v    -> p0-p1
+//   u    -> p2
+//   1    -> w0
+//   *p   -> memory, address -> x1
+//   2    -> w2
+//   3.0  -> d0
+//   true -> p3
+void test_argpass_no_p_tuple(PST *p, svbool_t u, svboolx2_t v) {
+  void argpass_no_p_tuple_callee(svboolx2_t, svbool_t, int, PST, int, double,
+                                 svbool_t);
+  argpass_no_p_tuple_callee(v, u, 1, *p, 2, 3.0, svptrue_b64());
+}
+// CHECK: declare void @argpass_no_p_tuple_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, i32 noundef, ptr noundef, i32 noundef, double noundef, <vscale x 16 x i1>)
+
+
+// HFAs go back-to-back to memory, afterwards Z-regs not available, PST passed indirectly.
+//   0.0   -> d0-d3
+//   *h    -> memory
+//   *p    -> memory, address -> x0
+//   *h    -> memory
+//   false -> p0
+void test_after_hfa(HFA *h, PST *p) {
+    void after_hfa_callee(double, double, double, double, double, HFA, PST, HFA, svbool_t);
+    after_hfa_callee(.0, .0, .0, .0, .0, *h, *p, *h, svpfalse());
+}
+// CHECK-AAPCS:  declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float] alignstack(8), ptr noundef, [4 x float] alignstack(8), <vscale x 16 x i1>)
+// CHECK-DARWIN: declare void @after_hfa_callee(double noundef, double noundef, double noundef, double noundef, double noundef, [4 x float], ptr noundef, [4 x float], <vscale x 16 x i1>)
+
+// Small PST, not enough registers, passed indirectly, unlike other small
+// aggregates.
+//   *s  -> x0-x1
+//   0.0 -> d0-d7
+//   *p  -> memory, address -> x2
+//   1.0 -> memory
+//   2.0 -> memory (next to the above)
+void test_small_pst(SmallPST *p, SmallAgg *s) {
+    void small_pst_callee(SmallAgg, double, double, double, double, double, double, double, double, double, SmallPST, double);
+    small_pst_callee(*s, .0, .0, .0, .0, .0, .0, .0, .0, 1.0, *p, 2.0);
+}
+// CHECK-AAPCS:  declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef, double noundef)
+// CHECK-DARWIN: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, i128, double noundef)
+
+
+// Simple return, PST expanded to registers
+//   p->a    -> p0
+//   p->x    -> q0
+//   p->y[0] -> q1
+//   p->y[1] -> q2
+//   p->z    -> q3
+//   p->b    -> p1
+PST test_return(PST *p) {
+    return *p;
+}
+// CHECK-AAPCS:  define dso_local <{ <vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1> }> @test_return(ptr
+// CHECK-DARWIN: define void @test_return(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.PST) align 16 %agg.result, ptr nocapture noundef readonly %p)
+
+// Corner case of 1-element aggregate
+//   p->x -> q0
+SmallPST test_return_small_pst(SmallPST *p) {
+    return *p;
+}
+// CHECK-AAPCS:  define dso_local <vscale x 4 x float> @test_return_small_pst(ptr
+// CHECK-DARWIN: define i128 @test_return_small_pst(ptr nocapture noundef readonly %p)
+
+
+// Big PST, returned indirectly
+//   *p -> *x8
+BigPST test_return_big_pst(BigPST *p) {
+    return *p;
+}
+// CHECK-AAPCS:  define dso_local void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p)
+// CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p)
+
+// Variadic arguments are unnamed, PST passed indirectly.
+// (Passing SVE types to a variadic function currently unsupported by
+// the AArch64 backend)
+//   p->a    -> p0
+//   p->x    -> q0
+//   p->y[0] -> q1
+//   p->y[1] -> q2
+//   p->z    -> q3
+//   p->b    -> p1
+//   *q -> memory, address -> x1
+void test_pass_variadic(PST *p, PST *q) {
+    void pass_variadic_callee(PST, ...);
+    pass_variadic_callee(*p, *q);
+}
+// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
+// CHECK-AAPCS: call void (<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...) @pass_variadic_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12, ptr noundef nonnull %byval-temp)
+
+// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %p, i64 96, i1 false)
+// CHECK-DARWIN: call void @llvm.lifetime.start.p0(i64 96, ptr nonnull %byval-temp1)
+// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp1, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
+// CHECK-DARWIN: call void (ptr, ...) @pass_variadic_callee(ptr noundef nonnull %byval-temp, ptr noundef nonnull %byval-temp1)
+
+
+// Test passing a small PST, still passed indirectly, despite being <= 128 bits
+void test_small_pst_variadic(SmallPST *p) {
+    void small_pst_variadic_callee(int, ...);
+    small_pst_variadic_callee(0, *p);
+}
+// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %byval-temp, ptr noundef nonnull align 16 dereferenceable(16) %p, i64 16, i1 false)
+// CHECK-AAPCS: call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, ptr noundef nonnull %byval-temp)
+
+// CHECK-DARWIN: %0 = load i128, ptr %p, align 16
+// CHECK-DARWIN: tail call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, i128 %0)
+
+// Test handling of a PST argument when passed in registers, from the callee side.
+void test_argpass_callee_side(PST v) {
+    void use(PST *p);
+    use(&v);
+}
+// CHECK-AAPCS:      define dso_local void @test_argpass_callee_side(<vscale x 16 x i1> %0, <vscale x 2 x double> %.coerce1, <vscale x 4 x float> %.coerce3, <vscale x 4 x float> %.coerce5, <vscale x 16 x i8> %.coerce7, <vscale x 16 x i1> %1)
+// CHECK-AAPCS-NEXT: entry:
+// CHECK-AAPCS-NEXT:   %v = alloca %struct.PST, align 16
+// CHECK-AAPCS-NEXT:   %.coerce = bitcast <vscale x 16 x i1> %0 to <vscale x 2 x i8>
+// CHECK-AAPCS-NEXT:   %cast.fixed = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %.coerce, i64 0)
+// CHECK-AAPCS-NEXT:   store <2 x i8> %cast.fixed, ptr %v, align 16
+// CHECK-AAPCS-NEXT:   %2 = getelementptr inbounds nuw i8, ptr %v, i64 16
+// CHECK-AAPCS-NEXT:   %cast.fixed2 = tail call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> %.coerce1, i64 0)
+// CHECK-AAPCS-NEXT:   store <2 x double> %cast.fixed2, ptr %2, align 16
+// CHECK-AAPCS-NEXT:   %3 = getelementptr inbounds nuw i8, ptr %v, i64 32
+// CHECK-AAPCS-NEXT:   %cast.fixed4 = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> %.coerce3, i64 0)
+// CHECK-AAPCS-NEXT:   store <4 x float> %cast.fixed4, ptr %3, align 16
+// CHECK-AAPCS-NEXT:   %4 = getelementptr inbounds nuw i8, ptr %v, i64 48
+// CHECK-AAPCS-NEXT:   %cast.fixed6 = tail call <4 x float> @llvm.vector.extract.v4f32.nxv4f32(<vscale x 4 x float> %.coerce5, i64 0)
+// CHECK-AAPCS-NEXT:   store <4 x float> %cast.fixed6, ptr %4, align 16
+// CHECK-AAPCS-NEXT:   %5 = getelementptr inbounds nuw i8, ptr %v, i64 64
+// CHECK-AAPCS-NEXT:   %cast.fixed8 = tail call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> %.coerce7, i64 0)
+// CHECK-AAPCS-NEXT:   store <16 x i8> %cast.fixed8, ptr %5, align 16
+// CHECK-AAPCS-NEXT:   %6 = getelementptr inbounds nuw i8, ptr %v, i64 80
+// CHECK-AAPCS-NEXT:   %.coerce9 = bitcast <vscale x 16 x i1> %1 to <vscale x 2 x i8>
+// CHECK-AAPCS-NEXT:   %cast.fixed10 = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %.coerce9, i64 0)
+// CHECK-AAPCS-NEXT:   store <2 x i8> %cast.fixed10, ptr %6, align 16
+// CHECK-AAPCS-NEXT:   call void @use(ptr noundef nonnull %v)
+// CHECK-AAPCS-NEXT:   ret void
+// CHECK-AAPCS-NEXT: }
+
+// Test va_arg operation
+#ifdef __cplusplus
+ extern "C"
+#endif
+void test_va_arg(int n, ...) {
+     va_list ap;
+     va_start(ap, n);  
+     PST v = va_arg(ap, PST);
+     va_end(ap);
+
+     void use1(bvec, fvec32);
+     use1(v.a, v.y[1]);
+}
+// CHECK-AAPCS: define dso_local void @test_va_arg(i32 noundef %n, ...)
+// CHECK-AAPCS-NEXT: entry:
+// CHECK-AAPCS-NEXT:   %ap = alloca %struct.__va_list, align 8
+// CHECK-AAPCS-NEXT:   call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   call void @llvm.va_start.p0(ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   %gr_offs_p = getelementptr inbounds nuw i8, ptr %ap, i64 24
+// CHECK-AAPCS-NEXT:   %gr_offs = load i32, ptr %gr_offs_p, align 8
+// CHECK-AAPCS-NEXT:   %0 = icmp sgt i32 %gr_offs, -1
+// CHECK-AAPCS-NEXT:   br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.maybe_reg:                                  ; preds = %entry
+
+// Increment by 8, size of the pointer to the argument value, not size of the argument value itself.
+
+// CHECK-AAPCS-NEXT:   %new_reg_offs = add nsw i32 %gr_offs, 8
+// CHECK-AAPCS-NEXT:   store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+// CHECK-AAPCS-NEXT:   %inreg = icmp ult i32 %gr_offs, -7
+// CHECK-AAPCS-NEXT:   br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+// CHECK-AAPCS-NEXT:   %reg_top_p = getelementptr inbounds nuw i8, ptr %ap, i64 8
+// CHECK-AAPCS-NEXT:   %reg_top = load ptr, ptr %reg_top_p, align 8
+// CHECK-AAPCS-NEXT:   %1 = sext i32 %gr_offs to i64
+// CHECK-AAPCS-NEXT:   %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+// CHECK-AAPCS-NEXT:   br label %vaarg.end
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+// CHECK-AAPCS-NEXT:   %stack = load ptr, ptr %ap, align 8
+// CHECK-AAPCS-NEXT:   %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+// CHECK-AAPCS-NEXT:   store ptr %new_stack, ptr %ap, align 8
+// CHECK-AAPCS-NEXT:   br label %vaarg.end
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+// CHECK-AAPCS-NEXT:   %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+
+// Extra indirection, for a composite passed indirectly.
+// CHECK-AAPCS-NEXT:   %vaarg.addr = load ptr, ptr %vaargs.addr, align 8
+
+// CHECK-AAPCS-NEXT:   %v.sroa.0.0.copyload = load <2 x i8>, ptr %vaarg.addr, align 16
+// CHECK-AAPCS-NEXT:   %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds i8, ptr %vaarg.addr, i64 48
+// CHECK-AAPCS-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16
+// CHECK-AAPCS-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
+// CHECK-AAPCS-NEXT:   %3 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
+// CHECK-AAPCS-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
+// CHECK-AAPCS-NEXT:   call void @use1(<vscale x 16 x i1> noundef %3, <vscale x 4 x float> noundef %cast.scalable2)
+// CHECK-AAPCS-NEXT:   call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   ret void
+// CHECK-AAPCS-NEXT: }
+
+// CHECK-DARWIN: define void @test_va_arg(i32 noundef %n, ...)
+// CHECK-DARWIN-NEXT: entry:
+// CHECK-DARWIN-NEXT:   %ap = alloca ptr, align 8
+// CHECK-DARWIN-NEXT:   call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   call void @llvm.va_start.p0(ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   %argp.cur = load ptr, ptr %ap, align 8
+// CHECK-DARWIN-NEXT:   %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8
+// CHECK-DARWIN-NEXT:   store ptr %argp.next, ptr %ap, align 8
+// CHECK-DARWIN-NEXT:   %0 = load ptr, ptr %argp.cur, align 8
+// CHECK-DARWIN-NEXT:   %v.sroa.0.0.copyload = load <2 x i8>, ptr %0, align 16
+// CHECK-DARWIN-NEXT:   %v.sroa.43.0..sroa_idx = getelementptr inbounds i8, ptr %0, i64 48
+// CHECK-DARWIN-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16
+// CHECK-DARWIN-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
+// CHECK-DARWIN-NEXT:   %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
+// CHECK-DARWIN-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
+// CHECK-DARWIN-NEXT:   call void @use1(<vscale x 16 x i1> noundef %1, <vscale x 4 x float> noundef %cast.scalable2)
+// CHECK-DARWIN-NEXT:   call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   ret void
+// CHECK-DARWIN-NEXT: }
diff --git a/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c b/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c
new file mode 100644
index 000000000000000..f595f1b222c4f65
--- /dev/null
+++ b/clang/test/CodeGen/amdgpu-barrier-type-debug-info.c
@@ -0,0 +1,8 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn -emit-llvm -o - %s -debug-info-kind=limited 2>&1 | FileCheck %s
+
+// CHECK: name: "__amdgpu_named_workgroup_barrier_t",{{.*}}baseType: ![[BT:[0-9]+]]
+// CHECK: [[BT]] = !DIBasicType(name: "__amdgpu_named_workgroup_barrier_t", size: 128, encoding: DW_ATE_unsigned)
+void test_locals(void) {
+  __amdgpu_named_workgroup_barrier_t k0;
+}
diff --git a/clang/test/CodeGen/arm-mfp8.c b/clang/test/CodeGen/arm-mfp8.c
index 35ec24c8a7880db..8c817fd5be1c9be 100644
--- a/clang/test/CodeGen/arm-mfp8.c
+++ b/clang/test/CodeGen/arm-mfp8.c
@@ -47,5 +47,39 @@ mfloat8x8_t test_ret_mfloat8x8_t(mfloat8x8_t v) {
   return v;
 }
 
+// CHECK-C-LABEL: define dso_local <1 x i8> @func1n(
+// CHECK-C-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] {
+// CHECK-C-NEXT:  [[ENTRY:.*:]]
+// CHECK-C-NEXT:    [[MFP8_ADDR:%.*]] = alloca <1 x i8>, align 1
+// CHECK-C-NEXT:    [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1
+// CHECK-C-NEXT:    store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1
+// CHECK-C-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[MFP8_ADDR]], align 1
+// CHECK-C-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
+// CHECK-C-NEXT:    store <1 x i8> [[TMP0]], ptr [[ARRAYIDX]], align 1
+// CHECK-C-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
+// CHECK-C-NEXT:    [[TMP1:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1
+// CHECK-C-NEXT:    ret <1 x i8> [[TMP1]]
+//
+// CHECK-CXX-LABEL: define dso_local <1 x i8> @_Z6func1nu11__MFloat8_t(
+// CHECK-CXX-SAME: <1 x i8> [[MFP8:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    [[MFP8_ADDR:%.*]] = alloca <1 x i8>, align 1
+// CHECK-CXX-NEXT:    [[F1N:%.*]] = alloca [10 x <1 x i8>], align 1
+// CHECK-CXX-NEXT:    store <1 x i8> [[MFP8]], ptr [[MFP8_ADDR]], align 1
+// CHECK-CXX-NEXT:    [[TMP0:%.*]] = load <1 x i8>, ptr [[MFP8_ADDR]], align 1
+// CHECK-CXX-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
+// CHECK-CXX-NEXT:    store <1 x i8> [[TMP0]], ptr [[ARRAYIDX]], align 1
+// CHECK-CXX-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [10 x <1 x i8>], ptr [[F1N]], i64 0, i64 2
+// CHECK-CXX-NEXT:    [[TMP1:%.*]] = load <1 x i8>, ptr [[ARRAYIDX1]], align 1
+// CHECK-CXX-NEXT:    ret <1 x i8> [[TMP1]]
+//
+__mfp8 func1n(__mfp8 mfp8) {
+  __mfp8 f1n[10];
+  f1n[2] = mfp8;
+  return f1n[2];
+}
+
+
+
 //// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 // CHECK: {{.*}}
diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c
index dc0cc429abffd18..cd09e05b25e4cd8 100644
--- a/clang/test/CodeGen/attr-target-version.c
+++ b/clang/test/CodeGen/attr-target-version.c
@@ -27,11 +27,11 @@ int foo() {
 inline int __attribute__((target_version("sha2+aes+f64mm"))) fmv_inline(void) { return 1; }
 inline int __attribute__((target_version("fp16+fcma+rdma+sme+ fp16 "))) fmv_inline(void) { return 2; }
 inline int __attribute__((target_version("sha3+i8mm+f32mm"))) fmv_inline(void) { return 12; }
-inline int __attribute__((target_version("dit+sve-ebf16"))) fmv_inline(void) { return 8; }
+inline int __attribute__((target_version("dit+ebf16"))) fmv_inline(void) { return 8; }
 inline int __attribute__((target_version("dpb+rcpc2 "))) fmv_inline(void) { return 6; }
 inline int __attribute__((target_version(" dpb2 + jscvt"))) fmv_inline(void) { return 7; }
 inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { return 3; }
-inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; }
+inline int __attribute__((target_version("sve+bf16"))) fmv_inline(void) { return 4; }
 inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
 inline int __attribute__((target_version("sve2+sve2-aes+sve2-bitperm"))) fmv_inline(void) { return 9; }
 inline int __attribute__((target_version("sve2-sm4+memtag"))) fmv_inline(void) { return 10; }
@@ -680,7 +680,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 //
 //
 // CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMsve-ebf16
+// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMebf16
 // CHECK-SAME: () #[[ATTR28:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 8
@@ -708,7 +708,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 //
 //
 // CHECK: Function Attrs: noinline nounwind optnone
-// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MsveMsve-bf16
+// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mbf16Msve
 // CHECK-SAME: () #[[ATTR32:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 4
@@ -837,20 +837,20 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de
 // CHECK-NEXT:    ret ptr @fmv_inline._Msve2-aesMsve2-sha3
 // CHECK:       resolver_else12:
 // CHECK-NEXT:    [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP28]], 4295098368
-// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 4295098368
+// CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[TMP28]], 1207959552
+// CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1207959552
 // CHECK-NEXT:    [[TMP31:%.*]] = and i1 true, [[TMP30]]
 // CHECK-NEXT:    br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]]
 // CHECK:       resolver_return13:
-// CHECK-NEXT:    ret ptr @fmv_inline._MditMsve-ebf16
+// CHECK-NEXT:    ret ptr @fmv_inline._Mbf16Msve
 // CHECK:       resolver_else14:
 // CHECK-NEXT:    [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP33:%.*]] = and i64 [[TMP32]], 3221225472
-// CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 3221225472
+// CHECK-NEXT:    [[TMP33:%.*]] = and i64 [[TMP32]], 268566528
+// CHECK-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 268566528
 // CHECK-NEXT:    [[TMP35:%.*]] = and i1 true, [[TMP34]]
 // CHECK-NEXT:    br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]]
 // CHECK:       resolver_return15:
-// CHECK-NEXT:    ret ptr @fmv_inline._MsveMsve-bf16
+// CHECK-NEXT:    ret ptr @fmv_inline._MditMebf16
 // CHECK:       resolver_else16:
 // CHECK-NEXT:    [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
 // CHECK-NEXT:    [[TMP37:%.*]] = and i64 [[TMP36]], 20971520
diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c b/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c
index b594fc876d4b9eb..035c4c6066be247 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type-native.c
@@ -52,8 +52,8 @@ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
 // CHECK: call <2 x half> @llvm.nvvm.fmax.ftz.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}})
 // CHECK: call <2 x half> @llvm.nvvm.fmax.nan.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}})
 // CHECK: call <2 x half> @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2(<2 x half> {{.*}}, <2 x half> {{.*}})
-// CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0(ptr {{.*}}, i32 2)
-// CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0(ptr {{.*}}, i32 4)
+// CHECK: load half, ptr addrspace(1) {{.*}}, align 2, !invariant.load
+// CHECK: load <2 x half>, ptr addrspace(1) {{.*}}, align 4, !invariant.load
 // CHECK: call half @llvm.nvvm.ldu.global.f.f16.p0(ptr {{.*}}, i32 2)
 // CHECK: call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p0(ptr {{.*}}, i32 4)
 __device__ void nvvm_native_half_types(void *a, void*b, void*c, __fp16* out) {
diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type.c b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
index 4aeae953bc1622c..511497702ff7f9e 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type.c
@@ -177,9 +177,9 @@ typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
 
 // CHECK-LABEL: nvvm_ldg_native_half_types
 __device__ void nvvm_ldg_native_half_types(const void *p) {
-  // CHECK: call half @llvm.nvvm.ldg.global.f.f16.p0
+  // CHECK: load half, ptr addrspace(1) {{.*}}, align 2, !invariant.load
   __nvvm_ldg_h((const __fp16 *)p);
-  // CHECK: call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p0
+  // CHECK: load <2 x half>, ptr addrspace(1) {{.*}}, align 4, !invariant.load
   __nvvm_ldg_h2((const __fp16v2 *)p);
 }
 
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index 0d0e3ecdb90c9e4..3406cbdde2bf880 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -598,33 +598,33 @@ __device__ void nvvm_atom(float *fp, float f, double *dfp, double df,
 
 // CHECK-LABEL: nvvm_ldg
 __device__ void nvvm_ldg(const void *p) {
-  // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
-  // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
-  // CHECK: call i8 @llvm.nvvm.ldg.global.i.i8.p0(ptr {{%[0-9]+}}, i32 1)
+  // CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load
+  // CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load
+  // CHECK: load i8, ptr addrspace(1) {{%[0-9]+}}, align 1, !invariant.load
   __nvvm_ldg_c((const char *)p);
   __nvvm_ldg_uc((const unsigned char *)p);
   __nvvm_ldg_sc((const signed char *)p);
 
-  // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2)
-  // CHECK: call i16 @llvm.nvvm.ldg.global.i.i16.p0(ptr {{%[0-9]+}}, i32 2)
+  // CHECK: load i16, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
+  // CHECK: load i16, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
   __nvvm_ldg_s((const short *)p);
   __nvvm_ldg_us((const unsigned short *)p);
 
-  // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
-  // CHECK: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
+  // CHECK: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
+  // CHECK: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
   __nvvm_ldg_i((const int *)p);
   __nvvm_ldg_ui((const unsigned int *)p);
 
-  // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
-  // LP32: call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr {{%[0-9]+}}, i32 4)
-  // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8)
-  // LP64: call i64 @llvm.nvvm.ldg.global.i.i64.p0(ptr {{%[0-9]+}}, i32 8)
+  // LP32: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
+  // LP32: load i32, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
+  // LP64: load i64, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
+  // LP64: load i64, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
   __nvvm_ldg_l((const long *)p);
   __nvvm_ldg_ul((const unsigned long *)p);
 
-  // CHECK: call float @llvm.nvvm.ldg.global.f.f32.p0(ptr {{%[0-9]+}}, i32 4)
+  // CHECK: load float, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
   __nvvm_ldg_f((const float *)p);
-  // CHECK: call double @llvm.nvvm.ldg.global.f.f64.p0(ptr {{%[0-9]+}}, i32 8)
+  // CHECK: load double, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
   __nvvm_ldg_d((const double *)p);
 
   // In practice, the pointers we pass to __ldg will be aligned as appropriate
@@ -636,9 +636,9 @@ __device__ void nvvm_ldg(const void *p) {
   // elements, its alignment is set to number of elements times the alignment of
   // its member: n*alignof(t)."
 
-  // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2)
-  // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2)
-  // CHECK: call <2 x i8> @llvm.nvvm.ldg.global.i.v2i8.p0(ptr {{%[0-9]+}}, i32 2)
+  // CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
+  // CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
+  // CHECK: load <2 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 2, !invariant.load
   typedef char char2 __attribute__((ext_vector_type(2)));
   typedef unsigned char uchar2 __attribute__((ext_vector_type(2)));
   typedef signed char schar2 __attribute__((ext_vector_type(2)));
@@ -646,9 +646,9 @@ __device__ void nvvm_ldg(const void *p) {
   __nvvm_ldg_uc2((const uchar2 *)p);
   __nvvm_ldg_sc2((const schar2 *)p);
 
-  // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4)
-  // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4)
-  // CHECK: call <4 x i8> @llvm.nvvm.ldg.global.i.v4i8.p0(ptr {{%[0-9]+}}, i32 4)
+  // CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
+  // CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
+  // CHECK: load <4 x i8>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
   typedef char char4 __attribute__((ext_vector_type(4)));
   typedef unsigned char uchar4 __attribute__((ext_vector_type(4)));
   typedef signed char schar4 __attribute__((ext_vector_type(4)));
@@ -656,59 +656,59 @@ __device__ void nvvm_ldg(const void *p) {
   __nvvm_ldg_uc4((const uchar4 *)p);
   __nvvm_ldg_sc4((const schar4 *)p);
 
-  // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4)
-  // CHECK: call <2 x i16> @llvm.nvvm.ldg.global.i.v2i16.p0(ptr {{%[0-9]+}}, i32 4)
+  // CHECK: load <2 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
+  // CHECK: load <2 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 4, !invariant.load
   typedef short short2 __attribute__((ext_vector_type(2)));
   typedef unsigned short ushort2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_s2((const short2 *)p);
   __nvvm_ldg_us2((const ushort2 *)p);
 
-  // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0(ptr {{%[0-9]+}}, i32 8)
-  // CHECK: call <4 x i16> @llvm.nvvm.ldg.global.i.v4i16.p0(ptr {{%[0-9]+}}, i32 8)
+  // CHECK: load <4 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
+  // CHECK: load <4 x i16>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
   typedef short short4 __attribute__((ext_vector_type(4)));
   typedef unsigned short ushort4 __attribute__((ext_vector_type(4)));
   __nvvm_ldg_s4((const short4 *)p);
   __nvvm_ldg_us4((const ushort4 *)p);
 
-  // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
-  // CHECK: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
+  // CHECK: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
+  // CHECK: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
   typedef int int2 __attribute__((ext_vector_type(2)));
   typedef unsigned int uint2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_i2((const int2 *)p);
   __nvvm_ldg_ui2((const uint2 *)p);
 
-  // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0(ptr {{%[0-9]+}}, i32 16)
-  // CHECK: call <4 x i32> @llvm.nvvm.ldg.global.i.v4i32.p0(ptr {{%[0-9]+}}, i32 16)
+  // CHECK: load <4 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
+  // CHECK: load <4 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
   typedef int int4 __attribute__((ext_vector_type(4)));
   typedef unsigned int uint4 __attribute__((ext_vector_type(4)));
   __nvvm_ldg_i4((const int4 *)p);
   __nvvm_ldg_ui4((const uint4 *)p);
 
-  // LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
-  // LP32: call <2 x i32> @llvm.nvvm.ldg.global.i.v2i32.p0(ptr {{%[0-9]+}}, i32 8)
-  // LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
-  // LP64: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
+  // LP32: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
+  // LP32: load <2 x i32>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
+  // LP64: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
+  // LP64: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
   typedef long long2 __attribute__((ext_vector_type(2)));
   typedef unsigned long ulong2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_l2((const long2 *)p);
   __nvvm_ldg_ul2((const ulong2 *)p);
 
-  // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
-  // CHECK: call <2 x i64> @llvm.nvvm.ldg.global.i.v2i64.p0(ptr {{%[0-9]+}}, i32 16)
+  // CHECK: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
+  // CHECK: load <2 x i64>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
   typedef long long longlong2 __attribute__((ext_vector_type(2)));
   typedef unsigned long long ulonglong2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_ll2((const longlong2 *)p);
   __nvvm_ldg_ull2((const ulonglong2 *)p);
 
-  // CHECK: call <2 x float> @llvm.nvvm.ldg.global.f.v2f32.p0(ptr {{%[0-9]+}}, i32 8)
+  // CHECK: load <2 x float>, ptr addrspace(1) {{%[0-9]+}}, align 8, !invariant.load
   typedef float float2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_f2((const float2 *)p);
 
-  // CHECK: call <4 x float> @llvm.nvvm.ldg.global.f.v4f32.p0(ptr {{%[0-9]+}}, i32 16)
+  // CHECK: load <4 x float>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
   typedef float float4 __attribute__((ext_vector_type(4)));
   __nvvm_ldg_f4((const float4 *)p);
 
-  // CHECK: call <2 x double> @llvm.nvvm.ldg.global.f.v2f64.p0(ptr {{%[0-9]+}}, i32 16)
+  // CHECK: load <2 x double>, ptr addrspace(1) {{%[0-9]+}}, align 16, !invariant.load
   typedef double double2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_d2((const double2 *)p);
 }
diff --git a/clang/test/CodeGen/debug-info-renderscript-tag.rs b/clang/test/CodeGen/debug-info-renderscript-tag.rs
deleted file mode 100644
index ded650d9660b8b4..000000000000000
--- a/clang/test/CodeGen/debug-info-renderscript-tag.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-// RUN: %clang -emit-llvm -S -g %s -o - | FileCheck %s
-
-// CHECK: !DICompileUnit(language: DW_LANG_GOOGLE_RenderScript{{.*}})
diff --git a/clang/test/CodeGen/fp16-ops.c b/clang/test/CodeGen/fp16-ops.c
index bfa2a2f7f6c8267..4c206690a7518e8 100644
--- a/clang/test/CodeGen/fp16-ops.c
+++ b/clang/test/CodeGen/fp16-ops.c
@@ -6,8 +6,6 @@
 // RUN:   | FileCheck %s --check-prefix=NATIVE-HALF
 // RUN: %clang_cc1 -emit-llvm -o - -triple aarch64 -fnative-half-type %s \
 // RUN:   | FileCheck %s --check-prefix=NATIVE-HALF
-// RUN: %clang_cc1 -emit-llvm -o - -x renderscript %s \
-// RUN:   | FileCheck %s --check-prefix=NATIVE-HALF
 typedef unsigned cond_t;
 typedef __fp16 float16_t;
 
diff --git a/clang/test/CodeGen/pgo-cold-function-coverage.c b/clang/test/CodeGen/pgo-cold-function-coverage.c
new file mode 100644
index 000000000000000..3003cdc3e15e02c
--- /dev/null
+++ b/clang/test/CodeGen/pgo-cold-function-coverage.c
@@ -0,0 +1,19 @@
+// Test -fprofile-generate-cold-function-coverage 
+
+// RUN: rm -rf %t && split-file %s %t
+// RUN: %clang --target=x86_64 -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof  -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s
+
+// CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00"
+
+// CHECK: store i8 0, ptr @__profc_bar, align 1
+// CHECK-NOT: @__profc_foo 
+
+//--- pgo-cold-func.prof
+foo:1:1
+ 1: 1
+
+//--- pgo-cold-func.c
+int bar(int x) { return x;}
+int foo(int x) { 
+    return x;
+}
diff --git a/clang/test/CodeGen/renderscript.c b/clang/test/CodeGen/renderscript.c
deleted file mode 100644
index 1629665c1ffb874..000000000000000
--- a/clang/test/CodeGen/renderscript.c
+++ /dev/null
@@ -1,140 +0,0 @@
-// RUN: %clang_cc1 %s -triple=renderscript32-none-linux-gnueabi -emit-llvm -o - -Werror | FileCheck %s -check-prefix=CHECK-RS32
-// RUN: %clang_cc1 %s -triple=renderscript64-none-linux-android -emit-llvm -o - -Werror | FileCheck %s -check-prefix=CHECK-RS64
-// RUN: %clang_cc1 %s -triple=armv7-none-linux-gnueabi -emit-llvm -o - -Werror | FileCheck %s -check-prefix=CHECK-ARM
-
-// Ensure that the bitcode has the correct triple
-// CHECK-RS32: target triple = "armv7-none-linux-gnueabi"
-// CHECK-RS64: target triple = "aarch64-none-linux-android"
-// CHECK-ARM: target triple = "armv7-none-linux-gnueabi"
-
-// Ensure that long data type has 8-byte size and alignment in RenderScript
-#ifdef __RENDERSCRIPT__
-#define LONG_WIDTH_AND_ALIGN 8
-#else
-#define LONG_WIDTH_AND_ALIGN 4
-#endif
-
-_Static_assert(sizeof(long) == LONG_WIDTH_AND_ALIGN, "sizeof long is wrong");
-_Static_assert(_Alignof(long) == LONG_WIDTH_AND_ALIGN, "sizeof long is wrong");
-
-// CHECK-RS32: i64 @test_long(i64 noundef %v)
-// CHECK-RS64: i64 @test_long(i64 noundef %v)
-// CHECK-ARM: i32 @test_long(i32 noundef %v)
-long test_long(long v) {
-  return v + 1;
-}
-
-// =============================================================================
-// Test coercion of aggregate argument or return value into integer arrays
-// =============================================================================
-
-// =============================================================================
-// aggregate parameter <= 4 bytes: coerced to [a x iNN] for both 32-bit and
-// 64-bit RenderScript
-// ==============================================================================
-
-typedef struct {char c1, c2, c3; } sChar3;
-typedef struct {short s; char c;} sShortChar;
-
-// CHECK-RS32: void @argChar3([3 x i8] %s.coerce)
-// CHECK-RS64: void @argChar3([3 x i8] %s.coerce)
-void argChar3(sChar3 s) {}
-
-// CHECK-RS32: void @argShortChar([2 x i16] %s.coerce)
-// CHECK-RS64: void @argShortChar([2 x i16] %s.coerce)
-void argShortChar(sShortChar s) {}
-
-// =============================================================================
-// aggregate return value <= 4 bytes: coerced to [a x iNN] for both 32-bit and
-// 64-bit RenderScript
-// =============================================================================
-
-// CHECK-RS32: [3 x i8] @retChar3()
-// CHECK-RS64: [3 x i8] @retChar3()
-sChar3 retChar3(void) { sChar3 r; return r; }
-
-// CHECK-RS32: [2 x i16] @retShortChar()
-// CHECK-RS64: [2 x i16] @retShortChar()
-sShortChar retShortChar(void) { sShortChar r; return r; }
-
-// =============================================================================
-// aggregate parameter <= 16 bytes: coerced to [a x iNN] for both 32-bit and
-// 64-bit RenderScript
-// =============================================================================
-
-typedef struct {short s1; char c; short s2; } sShortCharShort;
-typedef struct {int i; short s; char c; } sIntShortChar;
-typedef struct {long l; int i; } sLongInt;
-
-// CHECK-RS32: void @argShortCharShort([3 x i16] %s.coerce)
-// CHECK-RS64: void @argShortCharShort([3 x i16] %s.coerce)
-void argShortCharShort(sShortCharShort s) {}
-
-// CHECK-RS32: void @argIntShortChar([2 x i32] %s.coerce)
-// CHECK-RS64: void @argIntShortChar([2 x i32] %s.coerce)
-void argIntShortChar(sIntShortChar s) {}
-
-// CHECK-RS32: void @argLongInt([2 x i64] %s.coerce)
-// CHECK-RS64: void @argLongInt([2 x i64] %s.coerce)
-void argLongInt(sLongInt s) {}
-
-// =============================================================================
-// aggregate return value <= 16 bytes: returned on stack for 32-bit RenderScript
-// and coerced to [a x iNN] for 64-bit RenderScript
-// =============================================================================
-
-// CHECK-RS32: void @retShortCharShort(ptr dead_on_unwind noalias writable sret(%struct.sShortCharShort) align 2 %agg.result)
-// CHECK-RS64: [3 x i16] @retShortCharShort()
-sShortCharShort retShortCharShort(void) { sShortCharShort r; return r; }
-
-// CHECK-RS32: void @retIntShortChar(ptr dead_on_unwind noalias writable sret(%struct.sIntShortChar) align 4 %agg.result)
-// CHECK-RS64: [2 x i32] @retIntShortChar()
-sIntShortChar retIntShortChar(void) { sIntShortChar r; return r; }
-
-// CHECK-RS32: void @retLongInt(ptr dead_on_unwind noalias writable sret(%struct.sLongInt) align 8 %agg.result)
-// CHECK-RS64: [2 x i64] @retLongInt()
-sLongInt retLongInt(void) { sLongInt r; return r; }
-
-// =============================================================================
-// aggregate parameter <= 64 bytes: coerced to [a x iNN] for 32-bit RenderScript
-// and passed on the stack for 64-bit RenderScript
-// =============================================================================
-
-typedef struct {int i1, i2, i3, i4, i5; } sInt5;
-typedef struct {long l1, l2; char c; } sLong2Char;
-
-// CHECK-RS32: void @argInt5([5 x i32] %s.coerce)
-// CHECK-RS64: void @argInt5(ptr noundef %s)
-void argInt5(sInt5 s) {}
-
-// CHECK-RS32: void @argLong2Char([3 x i64] %s.coerce)
-// CHECK-RS64: void @argLong2Char(ptr noundef %s)
-void argLong2Char(sLong2Char s) {}
-
-// =============================================================================
-// aggregate return value <= 64 bytes: returned on stack for both 32-bit and
-// 64-bit RenderScript
-// =============================================================================
-
-// CHECK-RS32: void @retInt5(ptr dead_on_unwind noalias writable sret(%struct.sInt5) align 4 %agg.result)
-// CHECK-RS64: void @retInt5(ptr dead_on_unwind noalias writable sret(%struct.sInt5) align 4 %agg.result)
-sInt5 retInt5(void) { sInt5 r; return r;}
-
-// CHECK-RS32: void @retLong2Char(ptr dead_on_unwind noalias writable sret(%struct.sLong2Char) align 8 %agg.result)
-// CHECK-RS64: void @retLong2Char(ptr dead_on_unwind noalias writable sret(%struct.sLong2Char) align 8 %agg.result)
-sLong2Char retLong2Char(void) { sLong2Char r; return r;}
-
-// =============================================================================
-// aggregate parameters and return values > 64 bytes: passed and returned on the
-// stack for both 32-bit and 64-bit RenderScript
-// =============================================================================
-
-typedef struct {long l1, l2, l3, l4, l5, l6, l7, l8, l9; } sLong9;
-
-// CHECK-RS32: void @argLong9(ptr noundef byval(%struct.sLong9) align 8 %s)
-// CHECK-RS64: void @argLong9(ptr noundef %s)
-void argLong9(sLong9 s) {}
-
-// CHECK-RS32: void @retLong9(ptr dead_on_unwind noalias writable sret(%struct.sLong9) align 8 %agg.result)
-// CHECK-RS64: void @retLong9(ptr dead_on_unwind noalias writable sret(%struct.sLong9) align 8 %agg.result)
-sLong9 retLong9(void) { sLong9 r; return r; }
diff --git a/clang/test/CodeGen/rtsan_attribute_inserted.c b/clang/test/CodeGen/rtsan_attribute_inserted.c
index b21ecb6b6b06a90..cebfe43c81234cf 100644
--- a/clang/test/CodeGen/rtsan_attribute_inserted.c
+++ b/clang/test/CodeGen/rtsan_attribute_inserted.c
@@ -8,4 +8,4 @@ float process(float *a) [[clang::nonblocking]] { return *a; }
 int spinlock(int *a) [[clang::blocking]] { return *a; }
 // CHECK: @spinlock{{.*}} #1 {
 // CHECK: attributes #1 = {
-// CHECK-SAME: {{.*sanitize_realtime_unsafe .*}}
+// CHECK-SAME: {{.*sanitize_realtime_blocking .*}}
diff --git a/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c b/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c
index 0f43007c5e4c161..86305080c94acee 100644
--- a/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c
+++ b/clang/test/CodeGen/rtsan_no_attribute_sanitizer_disabled.c
@@ -5,4 +5,4 @@ int spinlock(int *a) [[clang::blocking]] { return *a; }
 
 // Without the -fsanitize=realtime flag, we shouldn't attach the attributes.
 // CHECK-NOT: {{.*sanitize_realtime .*}}
-// CHECK-NOT: {{.*sanitize_realtime_unsafe .*}}
+// CHECK-NOT: {{.*sanitize_realtime_blocking .*}}
diff --git a/clang/test/CodeGenCUDA/offloading-entries.cu b/clang/test/CodeGenCUDA/offloading-entries.cu
index ec21f018607ff01..259e3324e8ac94f 100644
--- a/clang/test/CodeGenCUDA/offloading-entries.cu
+++ b/clang/test/CodeGenCUDA/offloading-entries.cu
@@ -15,48 +15,48 @@
 #include "Inputs/cuda.h"
 
 //.
-// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
+// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
-// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
+// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
-// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
+// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
-// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
+// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
-// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
+// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
 // CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
 //.
-// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
+// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
+// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
+// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
 // HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
+// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
 // HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
-// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
+// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
 // HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
 //.
-// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
+// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
-// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
+// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
-// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
+// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
 // CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
-// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
+// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
 // CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
-// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
+// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
 // CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
 //.
-// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
+// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
 // HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
+// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
 // HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
+// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
 // HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
+// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
 // HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
-// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
+// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
 // HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
 //.
 // CUDA-LABEL: @_Z18__device_stub__foov(
@@ -137,3 +137,28 @@ template <typename T, int dim = 1, int mode = 0>
 struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
 
 texture<void> tex;
+//.
+// CUDA: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
+// CUDA: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
+// CUDA: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
+// CUDA: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
+// CUDA: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
+//.
+// HIP: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
+// HIP: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
+// HIP: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
+// HIP: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
+// HIP: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
+//.
+// CUDA-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
+// CUDA-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
+// CUDA-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
+// CUDA-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
+// CUDA-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
+//.
+// HIP-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
+// HIP-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
+// HIP-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
+// HIP-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
+// HIP-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
+//.
diff --git a/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp b/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp
new file mode 100644
index 000000000000000..a47f217dcd3db67
--- /dev/null
+++ b/clang/test/CodeGenCXX/amdgpu-barrier-typeinfo.cpp
@@ -0,0 +1,10 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn %s -emit-llvm -o - | FileCheck %s
+
+namespace std { class type_info; };
+
+auto &b0 = typeid(__amdgpu_named_workgroup_barrier_t);
+
+// CHECK-DAG: @_ZTSu34__amdgpu_named_workgroup_barrier_t = {{.*}} c"u34__amdgpu_named_workgroup_barrier_t\00"
+// CHECK-DAG: @_ZTIu34__amdgpu_named_workgroup_barrier_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu34__amdgpu_named_workgroup_barrier_t
+
diff --git a/clang/test/CodeGenHIP/amdgpu-barrier-type.hip b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip
new file mode 100644
index 000000000000000..229e8b3c737c6aa
--- /dev/null
+++ b/clang/test/CodeGenHIP/amdgpu-barrier-type.hip
@@ -0,0 +1,42 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature
+ // REQUIRES: amdgpu-registered-target
+ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
+
+#define __shared__ __attribute__((shared))
+
+__shared__ __amdgpu_named_workgroup_barrier_t bar;
+__shared__ __amdgpu_named_workgroup_barrier_t arr[2];
+__shared__ struct {
+  __amdgpu_named_workgroup_barrier_t x;
+  __amdgpu_named_workgroup_barrier_t y;
+} str;
+
+__amdgpu_named_workgroup_barrier_t *getBar();
+void useBar(__amdgpu_named_workgroup_barrier_t *);
+
+// CHECK-LABEL: define {{[^@]+}}@_Z7testSemPu34__amdgpu_named_workgroup_barrier_t
+// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[P_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT:    call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef addrspacecast (ptr addrspace(1) @bar to ptr)) #[[ATTR2]]
+// CHECK-NEXT:    call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef getelementptr inbounds ([2 x target("amdgcn.named.barrier", 0)], ptr addrspacecast (ptr addrspace(1) @arr to ptr), i64 0, i64 1)) #[[ATTR2]]
+// CHECK-NEXT:    call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef getelementptr inbounds nuw ([[STRUCT_ANON:%.*]], ptr addrspacecast (ptr addrspace(1) @str to ptr), i32 0, i32 1)) #[[ATTR2]]
+// CHECK-NEXT:    [[CALL:%.*]] = call noundef ptr @_Z6getBarv() #[[ATTR2]]
+// CHECK-NEXT:    call void @_Z6useBarPu34__amdgpu_named_workgroup_barrier_t(ptr noundef [[CALL]]) #[[ATTR2]]
+// CHECK-NEXT:    [[CALL1:%.*]] = call noundef ptr @_Z6getBarv() #[[ATTR2]]
+// CHECK-NEXT:    ret ptr [[CALL1]]
+//
+__amdgpu_named_workgroup_barrier_t *testSem(__amdgpu_named_workgroup_barrier_t *p) {
+  useBar(p);
+  useBar(&bar);
+  useBar(&arr[1]);
+  useBar(&str.y);
+  useBar(getBar());
+  return getBar();
+}
diff --git a/clang/test/CodeGenHLSL/builtins/countbits.hlsl b/clang/test/CodeGenHLSL/builtins/countbits.hlsl
index 8dfe977bfae6269..218d8dcd10f8d70 100644
--- a/clang/test/CodeGenHLSL/builtins/countbits.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/countbits.hlsl
@@ -4,26 +4,37 @@
 
 #ifdef __HLSL_ENABLE_16_BIT
 // CHECK-LABEL: test_countbits_ushort
-// CHECK: call i16 @llvm.ctpop.i16
-uint16_t test_countbits_ushort(uint16_t p0)
+// CHECK: [[A:%.*]] = call i16 @llvm.ctpop.i16
+// CHECK-NEXT: zext i16 [[A]] to i32
+uint test_countbits_ushort(uint16_t p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_short
+// CHECK: [[A:%.*]] = call i16 @llvm.ctpop.i16
+// CHECK-NEXT: sext i16 [[A]] to i32
+uint test_countbits_short(int16_t p0)
 {
 	return countbits(p0);
 }
 // CHECK-LABEL: test_countbits_ushort2
-// CHECK: call <2 x i16> @llvm.ctpop.v2i16
-uint16_t2 test_countbits_ushort2(uint16_t2 p0)
+// CHECK: [[A:%.*]] = call <2 x i16> @llvm.ctpop.v2i16
+// CHECK-NEXT: zext <2 x i16> [[A]] to <2 x i32>
+uint2 test_countbits_ushort2(uint16_t2 p0)
 {
 	return countbits(p0);
 }
 // CHECK-LABEL: test_countbits_ushort3
-// CHECK: call <3 x i16> @llvm.ctpop.v3i16
-uint16_t3 test_countbits_ushort3(uint16_t3 p0)
+// CHECK: [[A:%.*]] = call <3 x i16> @llvm.ctpop.v3i16
+// CHECK-NEXT: zext <3 x i16> [[A]] to <3 x i32>
+uint3 test_countbits_ushort3(uint16_t3 p0)
 {
 	return countbits(p0);
 }
 // CHECK-LABEL: test_countbits_ushort4
-// CHECK: call <4 x i16> @llvm.ctpop.v4i16
-uint16_t4 test_countbits_ushort4(uint16_t4 p0)
+// CHECK: [[A:%.*]] = call <4 x i16> @llvm.ctpop.v4i16
+// CHECK-NEXT: zext <4 x i16> [[A]] to <4 x i32>
+uint4 test_countbits_ushort4(uint16_t4 p0)
 {
 	return countbits(p0);
 }
@@ -31,7 +42,13 @@ uint16_t4 test_countbits_ushort4(uint16_t4 p0)
 
 // CHECK-LABEL: test_countbits_uint
 // CHECK: call i32 @llvm.ctpop.i32
-int test_countbits_uint(uint p0)
+uint test_countbits_uint(uint p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_int
+// CHECK: call i32 @llvm.ctpop.i32
+uint test_countbits_int(int p0)
 {
 	return countbits(p0);
 }
@@ -55,26 +72,37 @@ uint4 test_countbits_uint4(uint4 p0)
 }
 
 // CHECK-LABEL: test_countbits_long
-// CHECK: call i64 @llvm.ctpop.i64
-uint64_t test_countbits_long(uint64_t p0)
+// CHECK: [[A:%.*]] = call i64 @llvm.ctpop.i64
+// CHECK-NEXT: trunc i64 [[A]] to i32
+uint test_countbits_long(uint64_t p0)
+{
+	return countbits(p0);
+}
+// CHECK-LABEL: test_countbits_slong
+// CHECK: [[A:%.*]] = call i64 @llvm.ctpop.i64
+// CHECK-NEXT: trunc i64 [[A]] to i32
+uint test_countbits_slong(int64_t p0)
 {
 	return countbits(p0);
 }
 // CHECK-LABEL: test_countbits_long2
-// CHECK: call <2 x i64> @llvm.ctpop.v2i64
-uint64_t2 test_countbits_long2(uint64_t2 p0)
+// CHECK: [[A:%.*]] = call <2 x i64> @llvm.ctpop.v2i64
+// CHECK-NEXT: trunc <2 x i64> [[A]] to <2 x i32>
+uint2 test_countbits_long2(uint64_t2 p0)
 {
 	return countbits(p0);
 }
 // CHECK-LABEL: test_countbits_long3
-// CHECK: call <3 x i64> @llvm.ctpop.v3i64
-uint64_t3 test_countbits_long3(uint64_t3 p0)
+// CHECK: [[A:%.*]] = call <3 x i64> @llvm.ctpop.v3i64
+// CHECK-NEXT: trunc <3 x i64> [[A]] to <3 x i32>
+uint3 test_countbits_long3(uint64_t3 p0)
 {
 	return countbits(p0);
 }
 // CHECK-LABEL: test_countbits_long4
-// CHECK: call <4 x i64> @llvm.ctpop.v4i64
-uint64_t4 test_countbits_long4(uint64_t4 p0)
+// CHECK: [[A:%.*]] = call <4 x i64> @llvm.ctpop.v4i64
+// CHECK-NEXT: trunc <4 x i64> [[A]] to <4 x i32>
+uint4 test_countbits_long4(uint64_t4 p0)
 {
 	return countbits(p0);
 }
diff --git a/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl
new file mode 100644
index 000000000000000..a883c9d5cc3555e
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/splitdouble.hlsl
@@ -0,0 +1,91 @@
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.3-library %s -fnative-half-type -emit-llvm -O1 -o - | FileCheck %s
+// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple spirv-vulkan-library %s -fnative-half-type -emit-llvm -O0 -o - | FileCheck %s --check-prefix=SPIRV
+
+
+
+// CHECK: define {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
+// CHECK:      [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[VALD]])
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1
+//
+// SPIRV: define spir_func {{.*}} i32 {{.*}}test_scalar{{.*}}(double {{.*}} [[VALD:%.*]])
+// SPIRV-NOT:  @llvm.dx.splitdouble.i32
+// SPIRV:      [[LOAD:%.*]] = load double, ptr [[VALD]].addr, align 8
+// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[LOAD]] to <2 x i32>
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1
+uint test_scalar(double D) {
+  uint A, B;
+  asuint(D, A, B);
+  return A + B;
+}
+
+// CHECK: define {{.*}} <1 x i32> {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]])
+// CHECK:      [[TRUNC:%.*]] = extractelement <1 x double> %D, i64 0
+// CHECK-NEXT: [[VALRET:%.*]] = {{.*}} call { i32, i32 } @llvm.dx.splitdouble.i32(double [[TRUNC]])
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { i32, i32 } [[VALRET]], 1
+//
+// SPIRV: define spir_func {{.*}} <1 x i32> {{.*}}test_double1{{.*}}(<1 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT:  @llvm.dx.splitdouble.i32
+// SPIRV:      [[LOAD:%.*]] = load <1 x double>, ptr [[VALD]].addr, align 8
+// SPIRV-NEXT: [[TRUNC:%.*]] = extractelement <1 x double> [[LOAD]], i64 0
+// SPIRV-NEXT: [[CAST:%.*]] = bitcast double [[TRUNC]] to <2 x i32>
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 0
+// SPIRV-NEXT: extractelement <2 x i32> [[CAST]], i64 1
+uint1 test_double1(double1 D) {
+  uint A, B;
+  asuint(D, A, B);
+  return A + B;
+}
+
+// CHECK: define {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]])
+// CHECK:      [[VALRET:%.*]] = {{.*}} call { <2 x i32>, <2 x i32> } @llvm.dx.splitdouble.v2i32(<2 x double> [[VALD]])
+// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { <2 x i32>, <2 x i32> } [[VALRET]], 1
+//
+// SPIRV: define spir_func {{.*}} <2 x i32> {{.*}}test_vector2{{.*}}(<2 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT:  @llvm.dx.splitdouble.i32
+// SPIRV:      [[LOAD:%.*]] = load <2 x double>, ptr [[VALD]].addr, align 16
+// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <2 x double> [[LOAD]] to <4 x i32>
+// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <4 x i32> [[CAST1]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+uint2 test_vector2(double2 D) {
+  uint2 A, B;
+  asuint(D, A, B);
+  return A + B;
+}
+
+// CHECK: define {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
+// CHECK:      [[VALRET:%.*]] = {{.*}} call { <3 x i32>, <3 x i32> } @llvm.dx.splitdouble.v3i32(<3 x double> [[VALD]])
+// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { <3 x i32>, <3 x i32> } [[VALRET]], 1
+//
+// SPIRV: define spir_func {{.*}} <3 x i32> {{.*}}test_vector3{{.*}}(<3 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT:  @llvm.dx.splitdouble.i32
+// SPIRV:      [[LOAD:%.*]] = load <3 x double>, ptr [[VALD]].addr, align 32
+// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <3 x double> [[LOAD]] to <6 x i32>
+// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <6 x i32> [[CAST1]], <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
+// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <6 x i32> [[CAST1]], <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5>
+uint3 test_vector3(double3 D) {
+  uint3 A, B;
+  asuint(D, A, B);
+  return A + B;
+}
+
+// CHECK: define {{.*}} <4 x i32> {{.*}}test_vector4{{.*}}(<4 x double> {{.*}} [[VALD:%.*]])
+// CHECK:      [[VALRET:%.*]] = {{.*}} call { <4 x i32>, <4 x i32> } @llvm.dx.splitdouble.v4i32(<4 x double> [[VALD]])
+// CHECK-NEXT: extractvalue { <4 x i32>, <4 x i32> } [[VALRET]], 0
+// CHECK-NEXT: extractvalue { <4 x i32>, <4 x i32> } [[VALRET]], 1
+//
+// SPIRV: define spir_func {{.*}} <4 x i32> {{.*}}test_vector4{{.*}}(<4 x double> {{.*}} [[VALD:%.*]])
+// SPIRV-NOT: @llvm.dx.splitdouble.i32
+// SPIRV:      [[LOAD:%.*]] = load <4 x double>, ptr [[VALD]].addr, align 32
+// SPIRV-NEXT: [[CAST1:%.*]] = bitcast <4 x double> [[LOAD]] to <8 x i32>
+// SPIRV-NEXT: [[SHUF1:%.*]] = shufflevector <8 x i32> [[CAST1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+// SPIRV-NEXT: [[SHUF2:%.*]] = shufflevector <8 x i32> [[CAST1]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+uint4 test_vector4(double4 D) {
+  uint4 A, B;
+  asuint(D, A, B);
+  return A + B;
+}
diff --git a/clang/test/CodeGenHLSL/convergence/entry.point.hlsl b/clang/test/CodeGenHLSL/convergence/entry.point.hlsl
new file mode 100644
index 000000000000000..337a9ad5026c161
--- /dev/null
+++ b/clang/test/CodeGenHLSL/convergence/entry.point.hlsl
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -triple spirv-pc-vulkan-compute -finclude-default-header -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define void @main()
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[token:%[0-9]+]] = call token @llvm.experimental.convergence.entry()
+// CHECK-NEXT: call spir_func void @_Z4mainv() [ "convergencectrl"(token [[token]]) ]
+
+[numthreads(1,1,1)]
+void main() {
+}
+
diff --git a/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp b/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp
index bb9acf09d120be1..0a6462e4262413b 100644
--- a/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp
+++ b/clang/test/CodeGenOpenCLCXX/local_addrspace_init.clcpp
@@ -14,7 +14,7 @@ kernel void test() {
   // address space variables. User defined initialization could
   // make sense, but would it mean that all work items need to
   // execute it? Potentially disallowing any initialization would
-  // make things easier and assingments can be used to set specific
+  // make things easier and assignments can be used to set specific
   // values. This rules should make it consistent with OpenCL C.
   //__local C c();
 }
diff --git a/clang/test/Driver/XRay/xray-shared.cpp b/clang/test/Driver/XRay/xray-shared.cpp
index 215854e1fc7cefe..e331fefed1e0c9f 100644
--- a/clang/test/Driver/XRay/xray-shared.cpp
+++ b/clang/test/Driver/XRay/xray-shared.cpp
@@ -1,6 +1,5 @@
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fPIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
 // RUN: %clang -### --target=x86_64-unknown-linux-gnu -fpic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
-// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s
 // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-PIC -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
 // RUN: not %clang -### --target=x86_64-unknown-linux-gnu -fno-pic -fxray-instrument -fxray-shared -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR-PIC
 
diff --git a/clang/test/Driver/amdgpu-toolchain.c b/clang/test/Driver/amdgpu-toolchain.c
index b60d31bae627009..c1c5aa8e90e6868 100644
--- a/clang/test/Driver/amdgpu-toolchain.c
+++ b/clang/test/Driver/amdgpu-toolchain.c
@@ -32,3 +32,7 @@
 // RUN: %clang -### --target=amdgcn-amd-amdhsa -mcpu=gfx906 -nogpulib \
 // RUN:   -r %s 2>&1 | FileCheck -check-prefixes=RELO %s
 // RELO-NOT: -shared
+
+// RUN: %clang -target amdgcn-amd-amdhsa -march=gfx90a -stdlib -startfiles \
+// RUN:   -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s
+// STARTUP: ld.lld{{.*}}"-lc" "-lm" "{{.*}}crt1.o"
diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c
index 126e9e9fc83d577..baf370483003155 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -105,3 +105,11 @@
 // RUN:   | FileCheck -check-prefix=FEATURE %s
 
 // FEATURE: clang-nvlink-wrapper{{.*}}"--plugin-opt=-mattr=+ptx63"
+
+//
+// Test including the libc startup files and libc
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_61 -stdlib -startfiles \
+// RUN:   -nogpulib -nogpuinc -### %s 2>&1 | FileCheck -check-prefix=STARTUP %s
+
+// STARTUP: clang-nvlink-wrapper{{.*}}"-lc" "-lm" "{{.*}}crt1.o"
diff --git a/clang/test/Driver/fprofile-generate-cold-function-coverage.c b/clang/test/Driver/fprofile-generate-cold-function-coverage.c
new file mode 100644
index 000000000000000..9b2f46423f34b1d
--- /dev/null
+++ b/clang/test/Driver/fprofile-generate-cold-function-coverage.c
@@ -0,0 +1,8 @@
+// RUN: %clang -### -c -fprofile-generate-cold-function-coverage %s 2>&1 | FileCheck %s
+// CHECK: "--instrument-cold-function-only-path=default_%m.profraw" 
+// CHECK: "--pgo-function-entry-coverage"
+// CHECK-NOT:  "-fprofile-instrument"
+// CHECK-NOT:  "-fprofile-instrument-path=
+
+// RUN: %clang -### -c -fprofile-generate-cold-function-coverage=dir %s 2>&1 | FileCheck %s --check-prefix=CHECK-EQ
+// CHECK-EQ: "--instrument-cold-function-only-path=dir{{/|\\\\}}default_%m.profraw" 
diff --git a/clang/test/Driver/linker-wrapper.c b/clang/test/Driver/linker-wrapper.c
index 068ea2d7d3c663c..470af4d5d70cac7 100644
--- a/clang/test/Driver/linker-wrapper.c
+++ b/clang/test/Driver/linker-wrapper.c
@@ -250,3 +250,7 @@ __attribute__((visibility("protected"), used)) int x;
 //       MLLVM-SAME: -Xlinker -mllvm=-pass-remarks=foo,bar
 //  OFFLOAD-OPT-NOT: -Xlinker -mllvm=-pass-remarks=foo,bar
 // OFFLOAD-OPT-SAME: {{$}}
+
+// Error handling when --linker-path is not provided for clang-linker-wrapper
+// RUN: not clang-linker-wrapper 2>&1 | FileCheck --check-prefix=LINKER-PATH-NOT-PROVIDED %s
+// LINKER-PATH-NOT-PROVIDED: linker path missing, must pass 'linker-path'
diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c
index 2b0993caee42482..79f4a6641732f79 100644
--- a/clang/test/Driver/nvlink-wrapper.c
+++ b/clang/test/Driver/nvlink-wrapper.c
@@ -21,12 +21,13 @@ int bar() {
 }
 #else
 extern int y;
-int __attribute__((visibility("hidden"))) x = 999;
+extern int x;
 int baz() { return y + x; }
 #endif
 
 // Create various inputs to test basic linking and LTO capabilities. Creating a
 // CUDA binary requires access to the `ptxas` executable, so we just use x64.
+// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.o
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DX -o %t-x.o
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DY -o %t-y.o
 // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DZ -o %t-z.o
@@ -36,6 +37,7 @@ int baz() { return y + x; }
 // RUN: llvm-ar rcs %t-y.a %t-y.o
 // RUN: llvm-ar rcs %t-z.a %t-z.o
 // RUN: llvm-ar rcs %t-w.a %t-w.o
+// RUN: llvm-ar rcs %t-u.a %t-u.o
 
 //
 // Check that we forward any unrecognized argument to 'nvlink'.
@@ -49,11 +51,16 @@ int baz() { return y + x; }
 // `libx.a` and `liby.a` because extern weak symbols do not extract and `libz.a`
 // is not used at all.
 //
-// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.o %t-y.a %t-z.a %t-w.a \
+// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.a %t-y.a %t-z.a %t-w.a %t.o \
 // RUN:   -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LINK
 // LINK: nvlink{{.*}} -arch sm_52 -o a.out [[INPUT:.+]].cubin {{.*}}-x-{{.*}}.cubin{{.*}}-y-{{.*}}.cubin
 
-// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.o
+//
+// Same as above but we use '--undefined' to forcibly extract 'libz.a'
+//
+// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.a %t-y.a %t-z.a %t-w.a %t.o \
+// RUN:   -u z -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LINK
+// UNDEFINED: nvlink{{.*}} -arch sm_52 -o a.out [[INPUT:.+]].cubin {{.*}}-x-{{.*}}.cubin{{.*}}-y-{{.*}}.cubin{{.*}}-z-{{.*}}.cubin
 
 //
 // Check that the LTO interface works and properly preserves symbols used in a
diff --git a/clang/test/Driver/print-supported-extensions-riscv.c b/clang/test/Driver/print-supported-extensions-riscv.c
index 65375b79cb680dc..68acde65a74bfb6 100644
--- a/clang/test/Driver/print-supported-extensions-riscv.c
+++ b/clang/test/Driver/print-supported-extensions-riscv.c
@@ -110,6 +110,7 @@
 // CHECK-NEXT:     zvl8192b             1.0       'Zvl' (Minimum Vector Length) 8192
 // CHECK-NEXT:     zhinx                1.0       'Zhinx' (Half Float in Integer)
 // CHECK-NEXT:     zhinxmin             1.0       'Zhinxmin' (Half Float in Integer Minimal)
+// CHECK-NEXT:     sha                  1.0       'Sha' (Augmented Hypervisor)
 // CHECK-NEXT:     shcounterenw         1.0       'Shcounterenw' (Support writeable hcounteren enable bit for any hpmcounter that is not read-only zero)
 // CHECK-NEXT:     shgatpa              1.0       'Sgatpa' (SvNNx4 mode supported for all modes supported by satp, as well as Bare)
 // CHECK-NEXT:     shtvala              1.0       'Shtvala' (htval provides all needed values)
@@ -120,6 +121,8 @@
 // CHECK-NEXT:     smcdeleg             1.0       'Smcdeleg' (Counter Delegation Machine Level)
 // CHECK-NEXT:     smcsrind             1.0       'Smcsrind' (Indirect CSR Access Machine Level)
 // CHECK-NEXT:     smepmp               1.0       'Smepmp' (Enhanced Physical Memory Protection)
+// CHECK-NEXT:     smmpm                1.0       'Smmpm' (Machine-level Pointer Masking for M-mode)
+// CHECK-NEXT:     smnpm                1.0       'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)
 // CHECK-NEXT:     smrnmi               1.0       'Smrnmi' (Resumable Non-Maskable Interrupts)
 // CHECK-NEXT:     smstateen            1.0       'Smstateen' (Machine-mode view of the state-enable extension)
 // CHECK-NEXT:     ssaia                1.0       'Ssaia' (Advanced Interrupt Architecture Supervisor Level)
@@ -128,6 +131,8 @@
 // CHECK-NEXT:     sscofpmf             1.0       'Sscofpmf' (Count Overflow and Mode-Based Filtering)
 // CHECK-NEXT:     sscounterenw         1.0       'Sscounterenw' (Support writeable scounteren enable bit for any hpmcounter that is not read-only zero)
 // CHECK-NEXT:     sscsrind             1.0       'Sscsrind' (Indirect CSR Access Supervisor Level)
+// CHECK-NEXT:     ssnpm                1.0       'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode)
+// CHECK-NEXT:     sspm                 1.0       'Sspm' (Indicates Supervisor-mode Pointer Masking)
 // CHECK-NEXT:     ssqosid              1.0       'Ssqosid' (Quality-of-Service (QoS) Identifiers)
 // CHECK-NEXT:     ssstateen            1.0       'Ssstateen' (Supervisor-mode view of the state-enable extension)
 // CHECK-NEXT:     ssstrict             1.0       'Ssstrict' (No non-conforming extensions are present)
@@ -135,12 +140,14 @@
 // CHECK-NEXT:     sstvala              1.0       'Sstvala' (stval provides all needed values)
 // CHECK-NEXT:     sstvecd              1.0       'Sstvecd' (stvec supports Direct mode)
 // CHECK-NEXT:     ssu64xl              1.0       'Ssu64xl' (UXLEN=64 supported)
+// CHECK-NEXT:     supm                 1.0       'Supm' (Indicates User-mode Pointer Masking)
 // CHECK-NEXT:     svade                1.0       'Svade' (Raise exceptions on improper A/D bits)
 // CHECK-NEXT:     svadu                1.0       'Svadu' (Hardware A/D updates)
 // CHECK-NEXT:     svbare               1.0       'Svbare' $(satp mode Bare supported)
 // CHECK-NEXT:     svinval              1.0       'Svinval' (Fine-Grained Address-Translation Cache Invalidation)
 // CHECK-NEXT:     svnapot              1.0       'Svnapot' (NAPOT Translation Contiguity)
 // CHECK-NEXT:     svpbmt               1.0       'Svpbmt' (Page-Based Memory Types)
+// CHECK-NEXT:     svvptc               1.0       'svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid)
 // CHECK-NEXT:     xcvalu               1.0       'XCValu' (CORE-V ALU Operations)
 // CHECK-NEXT:     xcvbi                1.0       'XCVbi' (CORE-V Immediate Branching)
 // CHECK-NEXT:     xcvbitmanip          1.0       'XCVbitmanip' (CORE-V Bit Manipulation)
@@ -177,26 +184,21 @@
 // CHECK-NEXT:     zvbc32e              0.7       'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements)
 // CHECK-NEXT:     zvkgs                0.7       'Zvkgs' (Vector-Scalar GCM instructions for Cryptography)
 // CHECK-NEXT:     smctr                1.0       'Smctr' (Control Transfer Records Machine Level)
-// CHECK-NEXT:     smmpm                1.0       'Smmpm' (Machine-level Pointer Masking for M-mode)
-// CHECK-NEXT:     smnpm                1.0       'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)
 // CHECK-NEXT:     ssctr                1.0       'Ssctr' (Control Transfer Records Supervisor Level)
-// CHECK-NEXT:     ssnpm                1.0       'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode)
-// CHECK-NEXT:     sspm                 1.0       'Sspm' (Indicates Supervisor-mode Pointer Masking)
-// CHECK-NEXT:     supm                 1.0       'Supm' (Indicates User-mode Pointer Masking)
 // CHECK-EMPTY:
 // CHECK-NEXT: Supported Profiles
 // CHECK-NEXT:     rva20s64
 // CHECK-NEXT:     rva20u64
 // CHECK-NEXT:     rva22s64
 // CHECK-NEXT:     rva22u64
-// CHECK-NEXT:     rvi20u32
-// CHECK-NEXT:     rvi20u64
-// CHECK-EMPTY:
-// CHECK-NEXT: Experimental Profiles
 // CHECK-NEXT:     rva23s64
 // CHECK-NEXT:     rva23u64
 // CHECK-NEXT:     rvb23s64
 // CHECK-NEXT:     rvb23u64
+// CHECK-NEXT:     rvi20u32
+// CHECK-NEXT:     rvi20u64
+// CHECK-EMPTY:
+// CHECK-NEXT: Experimental Profiles
 // CHECK-NEXT:     rvm23u32
 // CHECK-EMPTY:
 // CHECK-NEXT: Use -march to specify the target's extension.
diff --git a/clang/test/Driver/ps5-linker.c b/clang/test/Driver/ps5-linker.c
index d18309a650726d8..2080f4dc91a7fb9 100644
--- a/clang/test/Driver/ps5-linker.c
+++ b/clang/test/Driver/ps5-linker.c
@@ -14,21 +14,32 @@
 // CHECK-NO-PIE-NOT: "-pie"
 // CHECK-SHARED: "--shared"
 
-// Test the driver passes PlayStation-specific -z options to the linker.
-
-// RUN: %clang --target=x86_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-Z %s
-
-// CHECK-Z: {{ld(\.exe)?}}"
-// CHECK-Z-SAME: "-z" "now"
-// CHECK-Z-SAME: "-z" "start-stop-visibility=hidden"
-// CHECK-Z-SAME: "-z" "dead-reloc-in-nonalloc=.debug_*=0xffffffffffffffff"
-// CHECK-Z-SAME: "-z" "dead-reloc-in-nonalloc=.debug_ranges=0xfffffffffffffffe"
-// CHECK-Z-SAME: "-z" "dead-reloc-in-nonalloc=.debug_loc=0xfffffffffffffffe"
-
-// RUN: %clang --target=x86_64-sie-ps5 -r %s -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-Z %s
-
-// CHECK-NO-Z: {{ld(\.exe)?}}"
-// CHECK-NO-Z-NOT: "-z"
+// Test the driver passes PlayStation-specific options to the linker that are
+// appropriate for the type of output. Many options don't apply for relocatable
+// output (-r).
+
+// RUN: %clang --target=x86_64-sie-ps5 %s -### 2>&1 | FileCheck --check-prefixes=CHECK-EXE %s
+// RUN: %clang --target=x86_64-sie-ps5 %s -shared -### 2>&1 | FileCheck --check-prefixes=CHECK-EXE %s
+// RUN: %clang --target=x86_64-sie-ps5 %s -static -### 2>&1 | FileCheck --check-prefixes=CHECK-EXE %s
+// RUN: %clang --target=x86_64-sie-ps5 %s -r -### 2>&1 | FileCheck --check-prefixes=CHECK-NO-EXE %s
+
+// CHECK-EXE: {{ld(\.exe)?}}"
+// CHECK-EXE-SAME: "--eh-frame-hdr"
+// CHECK-EXE-SAME: "--hash-style=sysv"
+// CHECK-EXE-SAME: "--build-id=uuid"
+// CHECK-EXE-SAME: "--unresolved-symbols=report-all"
+// CHECK-EXE-SAME: "-z" "now"
+// CHECK-EXE-SAME: "-z" "start-stop-visibility=hidden"
+// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_*=0xffffffffffffffff"
+// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_ranges=0xfffffffffffffffe"
+// CHECK-EXE-SAME: "-z" "dead-reloc-in-nonalloc=.debug_loc=0xfffffffffffffffe"
+
+// CHECK-NO-EXE: {{ld(\.exe)?}}"
+// CHECK-NO-EXE-NOT: "--eh-frame-hdr"
+// CHECK-NO-EXE-NOT: "--hash-style
+// CHECK-NO-EXE-NOT: "--build-id
+// CHECK-NO-EXE-NOT: "--unresolved-symbols
+// CHECK-NO-EXE-NOT: "-z"
 
 // Test that -static is forwarded to the linker
 
diff --git a/clang/test/Driver/renderscript.rs b/clang/test/Driver/renderscript.rs
deleted file mode 100644
index 84f5dc4de777a2e..000000000000000
--- a/clang/test/Driver/renderscript.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-// RUN: %clang -### 2>&1 %s | FileCheck %s
-
-// CHECK: "-x" "renderscript"
diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c
index 55aa5b398cee98d..67e09d0e69ebc3e 100644
--- a/clang/test/Driver/riscv-profiles.c
+++ b/clang/test/Driver/riscv-profiles.c
@@ -111,7 +111,7 @@
 // RVA22S64: "-target-feature" "+svinval"
 // RVA22S64: "-target-feature" "+svpbmt"
 
-// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 \
 // RUN:   | FileCheck -check-prefix=RVA23U64 %s
 // RVA23U64: "-target-feature" "+m"
 // RVA23U64: "-target-feature" "+a"
@@ -147,8 +147,9 @@
 // RVA23U64: "-target-feature" "+zvbb"
 // RVA23U64: "-target-feature" "+zvfhmin"
 // RVA23U64: "-target-feature" "+zvkt"
+// RVA23U64: "-target-feature" "+supm"
 
-// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23s64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23s64 \
 // RUN:   | FileCheck -check-prefix=RVA23S64 %s
 // RVA23S64: "-target-feature" "+m"
 // RVA23S64: "-target-feature" "+a"
@@ -186,6 +187,7 @@
 // RVA23S64: "-target-feature" "+zvbb"
 // RVA23S64: "-target-feature" "+zvfhmin"
 // RVA23S64: "-target-feature" "+zvkt"
+// RVA23S64: "-target-feature" "+sha"
 // RVA23S64: "-target-feature" "+shcounterenw"
 // RVA23S64: "-target-feature" "+shgatpa"
 // RVA23S64: "-target-feature" "+shtvala"
@@ -195,19 +197,20 @@
 // RVA23S64: "-target-feature" "+ssccptr"
 // RVA23S64: "-target-feature" "+sscofpmf"
 // RVA23S64: "-target-feature" "+sscounterenw"
-// RVA23S64: "-target-feature" "+experimental-ssnpm"
+// RVA23S64: "-target-feature" "+ssnpm"
 // RVA23S64: "-target-feature" "+ssstateen"
 // RVA23S64: "-target-feature" "+sstc"
 // RVA23S64: "-target-feature" "+sstvala"
 // RVA23S64: "-target-feature" "+sstvecd"
 // RVA23S64: "-target-feature" "+ssu64xl"
+// RVA23S64: "-target-feature" "+supm"
 // RVA23S64: "-target-feature" "+svade"
 // RVA23S64: "-target-feature" "+svbare"
 // RVA23S64: "-target-feature" "+svinval"
 // RVA23S64: "-target-feature" "+svnapot"
 // RVA23S64: "-target-feature" "+svpbmt"
 
-// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23u64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23u64 \
 // RUN:   | FileCheck -check-prefix=RVB23U64 %s
 // RVB23U64: "-target-feature" "+m"
 // RVB23U64: "-target-feature" "+a"
@@ -239,7 +242,7 @@
 // RVB23U64: "-target-feature" "+zbs"
 // RVB23U64: "-target-feature" "+zkt"
 
-// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23s64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rvb23s64 \
 // RUN:   | FileCheck -check-prefix=RVB23S64 %s
 // RVB23S64: "-target-feature" "+m"
 // RVB23S64: "-target-feature" "+a"
@@ -323,6 +326,6 @@
 // RUN: not %clang --target=riscv64 -### -c %s 2>&1 -march=rva22u64zfa | FileCheck -check-prefix=INVALID-ADDITIONAL %s
 // INVALID-ADDITIONAL: error: invalid arch name 'rva22u64zfa', additional extensions must be after separator '_'
 
-// RUN: not %clang --target=riscv64 -### -c %s 2>&1 -march=rva23u64 | FileCheck -check-prefix=EXPERIMENTAL-NOFLAG %s
-// EXPERIMENTAL-NOFLAG: error: invalid arch name 'rva23u64'
-// EXPERIMENTAL-NOFLAG: requires '-menable-experimental-extensions' for profile 'rva23u64'
+// RUN: not %clang --target=riscv32 -### -c %s 2>&1 -march=rvm23u32 | FileCheck -check-prefix=EXPERIMENTAL-NOFLAG %s
+// EXPERIMENTAL-NOFLAG: error: invalid arch name 'rvm23u32'
+// EXPERIMENTAL-NOFLAG: requires '-menable-experimental-extensions' for profile 'rvm23u32'
diff --git a/clang/test/Driver/unknown-std.c b/clang/test/Driver/unknown-std.c
index 8f9047b2230adb5..332d587ddd4a17a 100644
--- a/clang/test/Driver/unknown-std.c
+++ b/clang/test/Driver/unknown-std.c
@@ -4,7 +4,6 @@
 
 // RUN: not %clang %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
 // RUN: not %clang -x objective-c %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
-// RUN: not %clang -x renderscript %s -std=foobar -c 2>&1 | FileCheck --match-full-lines %s
 
 // CHECK: error: invalid value 'foobar' in '-std=foobar'
 // CHECK-NEXT: note: use 'c89', 'c90', or 'iso9899:1990' for 'ISO C 1990' standard
diff --git a/clang/test/Format/error-unfound-files.cpp b/clang/test/Format/error-unfound-files.cpp
new file mode 100644
index 000000000000000..1cc57ed064fb429
--- /dev/null
+++ b/clang/test/Format/error-unfound-files.cpp
@@ -0,0 +1,5 @@
+// RUN: rm -f a.c b.c
+
+// RUN: not clang-format a.c b.c 2>&1 | FileCheck %s
+// CHECK: a.c:
+// CHECK-NEXT: b.c:
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 914f94c08a9fd98..e28b0775410c0a5 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -173,7 +173,6 @@
 // CHECK-NEXT: RandomizeLayout (SubjectMatchRule_record)
 // CHECK-NEXT: ReadOnlyPlacement (SubjectMatchRule_record)
 // CHECK-NEXT: ReleaseHandle (SubjectMatchRule_variable_is_parameter)
-// CHECK-NEXT: RenderScriptKernel (SubjectMatchRule_function)
 // CHECK-NEXT: ReqdWorkGroupSize (SubjectMatchRule_function)
 // CHECK-NEXT: Restrict (SubjectMatchRule_function)
 // CHECK-NEXT: ReturnTypestate (SubjectMatchRule_function, SubjectMatchRule_variable_is_parameter)
diff --git a/clang/test/Modules/no-external-type-id.cppm b/clang/test/Modules/no-external-type-id.cppm
index b8b987403812f23..d067e574e72e37b 100644
--- a/clang/test/Modules/no-external-type-id.cppm
+++ b/clang/test/Modules/no-external-type-id.cppm
@@ -23,7 +23,7 @@ export module b;
 import a;
 export int b();
 
-// CHECK: <DECL_FUNCTION {{.*}} op8=4104
+// CHECK: <DECL_FUNCTION {{.*}} op8=4120
 // CHECK: <TYPE_FUNCTION_PROTO
 
 //--- a.v1.cppm
diff --git a/clang/test/Modules/static-initializer.cppm b/clang/test/Modules/static-initializer.cppm
new file mode 100644
index 000000000000000..10d4854ee67fa62
--- /dev/null
+++ b/clang/test/Modules/static-initializer.cppm
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/a.cpp
+
+//--- a.cppm
+export module a;
+int func();
+static int a = func();
+
+//--- a.cpp
+import a;
+
+// CHECK-NOT: internal global
+// CHECK-NOT: __cxx_global_var_init
+
diff --git a/clang/test/Preprocessor/predefined-macros-no-warnings.c b/clang/test/Preprocessor/predefined-macros-no-warnings.c
index d44b99a2b192a16..4e3e29ccfa8a83f 100644
--- a/clang/test/Preprocessor/predefined-macros-no-warnings.c
+++ b/clang/test/Preprocessor/predefined-macros-no-warnings.c
@@ -179,8 +179,6 @@
 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm64-wasi
 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm64-emscripten
 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple dxil
-// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple renderscript32
-// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple renderscript64
 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple ve
 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple csky
 // RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple csky-linux
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 9e986f0143aefab..597325ffa5e4eef 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -20,6 +20,7 @@
 // CHECK-NOT: __riscv_m {{.*$}}
 // CHECK-NOT: __riscv_mul {{.*$}}
 // CHECK-NOT: __riscv_muldiv {{.*$}}
+// CHECK-NOT: __riscv_sha {{.*$}}
 // CHECK-NOT: __riscv_shcounterenw {{.*$}}
 // CHECK-NOT: __riscv_shgatpa {{.*$}}
 // CHECK-NOT: __riscv_shtvala {{.*$}}
@@ -51,6 +52,7 @@
 // CHECK-NOT: __riscv_svinval {{.*$}}
 // CHECK-NOT: __riscv_svnapot {{.*$}}
 // CHECK-NOT: __riscv_svpbmt {{.*$}}
+// CHECK-NOT: __riscv_svvptc {{.*$}}
 // CHECK-NOT: __riscv_v {{.*$}}
 // CHECK-NOT: __riscv_v_elen {{.*$}}
 // CHECK-NOT: __riscv_v_elen_fp {{.*$}}
@@ -323,6 +325,14 @@
 // CHECK-M-EXT: __riscv_mul 1
 // CHECK-M-EXT: __riscv_muldiv 1
 
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32isha -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64isha -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s
+// CHECK-SHA-EXT: __riscv_sha 1000000{{$}}
+
 // RUN: %clang --target=riscv32-unknown-linux-gnu \
 // RUN:   -march=rv32ishcounterenw -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SHCOUNTERENW-EXT %s
@@ -507,6 +517,14 @@
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SVPBMT-EXT %s
 // CHECK-SVPBMT-EXT: __riscv_svpbmt 1000000{{$}}
 
+// RUN: %clang --target=riscv32-unknown-linux-gnu \
+// RUN:   -march=rv32isvvptc -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-SVVPTC-EXT %s
+// RUN: %clang --target=riscv64-unknown-linux-gnu \
+// RUN:   -march=rv64isvvptc -E -dM %s \
+// RUN:   -o - | FileCheck --check-prefix=CHECK-SVVPTC-EXT %s
+// CHECK-SVVPTC-EXT: __riscv_svvptc 1000000{{$}}
+
 // RUN: %clang --target=riscv32-unknown-linux-gnu \
 // RUN:   -march=rv32iv1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-V-EXT %s
@@ -1719,10 +1737,10 @@
 // RUN:   -o - | FileCheck --check-prefix=CHECK-ZICFISS-EXT %s
 // CHECK-ZICFISS-EXT: __riscv_zicfiss 1000000{{$}}
 
-// RUN: %clang --target=riscv32 -menable-experimental-extensions \
+// RUN: %clang --target=riscv32 \
 // RUN:   -march=rv32i_ssnpm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SSNPM-EXT %s
-// RUN: %clang --target=riscv64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 \
 // RUN:   -march=rv64i_ssnpm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SSNPM-EXT %s
 // CHECK-SSNPM-EXT: __riscv_ssnpm 1000000{{$}}
@@ -1735,26 +1753,26 @@
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SMNPM-EXT %s
 // CHECK-SMNPM-EXT: __riscv_smnpm 1000000{{$}}
 
-// RUN: %clang --target=riscv32 -menable-experimental-extensions \
+// RUN: %clang --target=riscv32 \
 // RUN:   -march=rv32i_smmpm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SMMPM-EXT %s
-// RUN: %clang --target=riscv64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 \
 // RUN:   -march=rv64i_smmpm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SMMPM-EXT %s
 // CHECK-SMMPM-EXT: __riscv_smmpm 1000000{{$}}
 
-// RUN: %clang --target=riscv32 -menable-experimental-extensions \
+// RUN: %clang --target=riscv32 \
 // RUN:   -march=rv32i_sspm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SSPM-EXT %s
-// RUN: %clang --target=riscv64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 \
 // RUN:   -march=rv64i_sspm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SSPM-EXT %s
 // CHECK-SSPM-EXT: __riscv_sspm 1000000{{$}}
 
-// RUN: %clang --target=riscv32 -menable-experimental-extensions \
+// RUN: %clang --target=riscv32 \
 // RUN:   -march=rv32i_supm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SUPM-EXT %s
-// RUN: %clang --target=riscv64 -menable-experimental-extensions \
+// RUN: %clang --target=riscv64 \
 // RUN:   -march=rv64i_supm1p0 -E -dM %s \
 // RUN:   -o - | FileCheck --check-prefix=CHECK-SUPM-EXT %s
 // CHECK-SUPM-EXT: __riscv_supm 1000000{{$}}
diff --git a/clang/test/Preprocessor/wasm-target-features.c b/clang/test/Preprocessor/wasm-target-features.c
index 14d2fbf4423d32b..71b7cf6a5d43cc1 100644
--- a/clang/test/Preprocessor/wasm-target-features.c
+++ b/clang/test/Preprocessor/wasm-target-features.c
@@ -163,8 +163,10 @@
 // RUN:     -target wasm64-unknown-unknown -mcpu=generic \
 // RUN:   | FileCheck %s -check-prefix=GENERIC-INCLUDE
 //
+// GENERIC-INCLUDE-DAG: #define __wasm_bulk_memory__ 1{{$}}
 // GENERIC-INCLUDE-DAG: #define __wasm_multivalue__ 1{{$}}
 // GENERIC-INCLUDE-DAG: #define __wasm_mutable_globals__ 1{{$}}
+// GENERIC-INCLUDE-DAG: #define __wasm_nontrapping_fptoint__ 1{{$}}
 // GENERIC-INCLUDE-DAG: #define __wasm_reference_types__ 1{{$}}
 // GENERIC-INCLUDE-DAG: #define __wasm_sign_ext__ 1{{$}}
 //
@@ -176,12 +178,10 @@
 // RUN:   | FileCheck %s -check-prefix=GENERIC
 //
 // GENERIC-NOT: #define __wasm_atomics__ 1{{$}}
-// GENERIC-NOT: #define __wasm_bulk_memory__ 1{{$}}
 // GENERIC-NOT: #define __wasm_exception_handling__ 1{{$}}
 // GENERIC-NOT: #define __wasm_extended_const__ 1{{$}}
 // GENERIC-NOT: #define __wasm__fp16__ 1{{$}}
 // GENERIC-NOT: #define __wasm_multimemory__ 1{{$}}
-// GENERIC-NOT: #define __wasm_nontrapping_fptoint__ 1{{$}}
 // GENERIC-NOT: #define __wasm_relaxed_simd__ 1{{$}}
 // GENERIC-NOT: #define __wasm_simd128__ 1{{$}}
 // GENERIC-NOT: #define __wasm_tail_call__ 1{{$}}
diff --git a/clang/test/Sema/arithmetic-fence-builtin.c b/clang/test/Sema/arithmetic-fence-builtin.c
index a1941970edb53c0..55867ffb5e012cd 100644
--- a/clang/test/Sema/arithmetic-fence-builtin.c
+++ b/clang/test/Sema/arithmetic-fence-builtin.c
@@ -1,8 +1,13 @@
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s -fexperimental-new-constant-interpreter
 // RUN: %clang_cc1 -triple ppc64le -DPPC     -emit-llvm -o - -verify -x c++ %s
+// RUN: %clang_cc1 -triple ppc64le -DPPC     -emit-llvm -o - -verify -x c++ %s -fexperimental-new-constant-interpreter
 // RUN: not %clang_cc1 -triple ppc64le -DPPC     -emit-llvm -o - -x c++ %s \
 // RUN:            -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s
+// RUN: not %clang_cc1 -triple ppc64le -DPPC     -emit-llvm -o - -x c++ %s -fexperimental-new-constant-interpreter \
+// RUN:            -fprotect-parens 2>&1 | FileCheck -check-prefix=PPC %s
 // RUN: %clang_cc1 -triple spir64 -emit-llvm -o - -verify -x c++ %s
+// RUN: %clang_cc1 -triple spir64 -emit-llvm -o - -verify -x c++ %s -fexperimental-new-constant-interpreter
 #ifndef PPC
 int v;
 template <typename T> T addT(T a, T b) {
diff --git a/clang/test/Sema/arm-mfp8.c b/clang/test/Sema/arm-mfp8.c
new file mode 100644
index 000000000000000..e917078f50ed7e5
--- /dev/null
+++ b/clang/test/Sema/arm-mfp8.c
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64-arm-none-eabi -target-feature -fp8 %s
+
+// REQUIRES: aarch64-registered-target
+#include<arm_neon.h>
+__mfp8 test_cast_from_float(unsigned in) {
+  return (__mfp8)in; // expected-error {{used type '__mfp8' (aka '__MFloat8_t') where arithmetic or pointer type is required}}
+}
+
+unsigned test_cast_to_int(__mfp8 in) {
+  return (unsigned)in; // expected-error {{operand of type '__mfp8' (aka '__MFloat8_t') where arithmetic or pointer type is required}}
+}
diff --git a/clang/test/Sema/arm-mfp8.cpp b/clang/test/Sema/arm-mfp8.cpp
index f270168faceb328..e882c382522c223 100644
--- a/clang/test/Sema/arm-mfp8.cpp
+++ b/clang/test/Sema/arm-mfp8.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify=sve,neon -triple aarch64-arm-none-eabi \
+// RUN: %clang_cc1 -fsyntax-only -verify=sve,neon,scalar -triple aarch64-arm-none-eabi \
 // RUN: -target-feature -fp8 -target-feature +sve  -target-feature +neon %s
 
 // REQUIRES: aarch64-registered-target
@@ -29,3 +29,35 @@ void test_vector(mfloat8x8_t a, mfloat8x16_t b, uint8x8_t c) {
   c * b;  // neon-error {{cannot convert between vector and non-scalar values ('uint8x8_t' (vector of 8 'uint8_t' values) and 'mfloat8x16_t' (aka '__MFloat8x16_t'))}}
   c / b;  // neon-error {{cannot convert between vector and non-scalar values ('uint8x8_t' (vector of 8 'uint8_t' values) and 'mfloat8x16_t' (aka '__MFloat8x16_t'))}}
 }
+__mfp8 test_static_cast_from_char(char in) {
+  return static_cast<__mfp8>(in); // scalar-error {{static_cast from 'char' to '__mfp8' (aka '__MFloat8_t') is not allowed}}
+}
+
+char test_static_cast_to_char(__mfp8 in) {
+  return static_cast<char>(in); // scalar-error {{static_cast from '__mfp8' (aka '__MFloat8_t') to 'char' is not allowed}}
+}
+void test(bool b) {
+  __mfp8 mfp8;
+
+  mfp8 + mfp8;  // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}}
+  mfp8 - mfp8;  // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}}
+  mfp8 * mfp8;  // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}}
+  mfp8 / mfp8;  // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and '__mfp8')}}
+  ++mfp8;       // scalar-error {{cannot increment value of type '__mfp8' (aka '__MFloat8_t')}}
+  --mfp8;       // scalar-error {{cannot decrement value of type '__mfp8' (aka '__MFloat8_t')}}
+
+  char u8;
+
+  mfp8 + u8;   // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}}
+  u8 + mfp8;   // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}}
+  mfp8 - u8;   // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}}
+  u8 - mfp8;   // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}}
+  mfp8 * u8;   // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}}
+  u8 * mfp8;   // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}}
+  mfp8 / u8;   // scalar-error {{invalid operands to binary expression ('__mfp8' (aka '__MFloat8_t') and 'char')}}
+  u8 / mfp8;   // scalar-error {{invalid operands to binary expression ('char' and '__mfp8' (aka '__MFloat8_t'))}}
+  mfp8 = u8;   // scalar-error {{assigning to '__mfp8' (aka '__MFloat8_t') from incompatible type 'char'}}
+  u8 = mfp8;   // scalar-error {{assigning to 'char' from incompatible type '__mfp8' (aka '__MFloat8_t')}}
+  mfp8 + (b ? u8 : mfp8);  // scalar-error {{incompatible operand types ('char' and '__mfp8' (aka '__MFloat8_t'))}}
+}
+
diff --git a/clang/test/Sema/asm.c b/clang/test/Sema/asm.c
index 6cd95c71604d443..28ef3ec6ce09c2a 100644
--- a/clang/test/Sema/asm.c
+++ b/clang/test/Sema/asm.c
@@ -204,6 +204,12 @@ double f_output_constraint(void) {
   return result;
 }
 
+double f_output_constraint_2(void) {
+  double result;
+  __asm("foo1": "+f" (result)); // expected-error {{invalid output constraint '+f' in asm}}
+  return result;
+}
+
 void fn1(void) {
   int l;
   __asm__(""
diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c
index a723c5965c5bcde..e101fefd2b67c4b 100644
--- a/clang/test/Sema/attr-target-clones-aarch64.c
+++ b/clang/test/Sema/attr-target-clones-aarch64.c
@@ -7,7 +7,7 @@ void __attribute__((target_clones("default+sha3"))) warn1(void);
 
 // expected-error@+2 {{'target_clones' and 'target_version' attributes are not compatible}}
 // expected-note@+1 {{conflicting attribute is here}}
-void __attribute__((target_version("sve-bf16"), target_clones("sme+memtag"))) not_compat(void);
+void __attribute__((target_version("sve"), target_clones("sme+memtag"))) not_compat(void);
 
 int redecl(void);
 int __attribute__((target_clones("frintts", "simd+fp", "default"))) redecl(void) { return 1; }
@@ -78,4 +78,4 @@ int useage(void) {
 // expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}}
 int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; }
 // expected-error@+1 {{'main' cannot be a multiversioned function}}
-int __attribute__((target_clones("sve-i8mm"))) main() { return 1; }
+int __attribute__((target_clones("i8mm"))) main() { return 1; }
diff --git a/clang/test/Sema/renderscript.rs b/clang/test/Sema/renderscript.rs
deleted file mode 100644
index 8fa0d4389e7f1f8..000000000000000
--- a/clang/test/Sema/renderscript.rs
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -x renderscript -D__RENDERSCRIPT__ %s
-// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fsyntax-only -verify -x c %s
-// REQUIRES: x86-registered-target
-
-#ifndef __RENDERSCRIPT__
-// expected-warning@+2 {{'kernel' attribute ignored}}
-#endif
-void __attribute__((kernel)) kernel(void) {}
-
-#ifndef __RENDERSCRIPT__
-// expected-warning@+4 {{'kernel' attribute ignored}}
-#else
-// expected-warning@+2 {{'kernel' attribute only applies to functions}}
-#endif
-int __attribute__((kernel)) global;
-
-#ifndef __RENDERSCRIPT__
-// expected-error@+2 {{function return value cannot have __fp16 type; did you forget * ?}}
-#endif
-__fp16 fp16_return(void);
-
-#ifndef __RENDERSCRIPT__
-// expected-error@+2 {{parameters cannot have __fp16 type; did you forget * ?}}
-#endif
-void fp16_arg(__fp16 p);
diff --git a/clang/test/SemaCXX/amdgpu-barrier.cpp b/clang/test/SemaCXX/amdgpu-barrier.cpp
new file mode 100644
index 000000000000000..a171433727dda41
--- /dev/null
+++ b/clang/test/SemaCXX/amdgpu-barrier.cpp
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 -triple amdgcn -Wno-unused-value %s
+
+void foo() {
+  int n = 100;
+  __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}}
+  static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}}
+  dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
+  reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}}
+  int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}}
+  __amdgpu_named_workgroup_barrier_t k;
+  int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}}
+  void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}}
+}
+
+static_assert(sizeof(__amdgpu_named_workgroup_barrier_t) == 16, "wrong size");
+static_assert(alignof(__amdgpu_named_workgroup_barrier_t) == 4, "wrong alignment");
diff --git a/clang/test/SemaCXX/attr-lifetimebound.cpp b/clang/test/SemaCXX/attr-lifetimebound.cpp
index 1c5c79777c71c8d..804d61fb62ca402 100644
--- a/clang/test/SemaCXX/attr-lifetimebound.cpp
+++ b/clang/test/SemaCXX/attr-lifetimebound.cpp
@@ -1,8 +1,7 @@
 // RUN: %clang_cc1 -std=c++23 -verify %s
 
 namespace usage_invalid {
-  // FIXME: Should we diagnose a void return type?
-  void voidreturn(int &param [[clang::lifetimebound]]);
+  void void_return(int &param [[clang::lifetimebound]]); // expected-error {{'lifetimebound' attribute cannot be applied to a parameter of a function that returns void}}
 
   int *not_class_member() [[clang::lifetimebound]]; // expected-error {{non-member function has no implicit object parameter}}
   struct A {
@@ -12,6 +11,8 @@ namespace usage_invalid {
     int *explicit_object(this A&) [[clang::lifetimebound]]; // expected-error {{explicit object member function has no implicit object parameter}}
     int not_function [[clang::lifetimebound]]; // expected-error {{only applies to parameters and implicit object parameters}}
     int [[clang::lifetimebound]] also_not_function; // expected-error {{cannot be applied to types}}
+    // FIXME: Should diagnose a void return type.
+    void void_return_member() [[clang::lifetimebound]];
   };
   int *attr_with_param(int &param [[clang::lifetimebound(42)]]); // expected-error {{takes no arguments}}
 }
@@ -31,6 +32,13 @@ namespace usage_ok {
     return *(int*)param;
   }
 
+  template <class T, class R = void> R dependent_void(const T& t [[clang::lifetimebound]]);
+  void dependent_void_instantiation() {
+    dependent_void<int>(1); // OK: Returns void.
+    int x = dependent_void<int, int>(1); // expected-warning {{temporary whose address is used as value of local variable 'x' will be destroyed at the end of the full-expression}}
+    dependent_void<int, int>(1); // OK: Returns an unused value.
+  }
+
   struct A {
     A();
     A(int);
diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp
index 2c85f9735a87b16..c0a645713b21871 100644
--- a/clang/test/SemaCXX/attr-target-version.cpp
+++ b/clang/test/SemaCXX/attr-target-version.cpp
@@ -49,7 +49,7 @@ double __attribute__((target_version("rcpc"))) diff_type1(void);
 
 auto __attribute__((target_version("rcpc2"))) diff_type2(void) -> int { return 1; }
 //expected-error@+1 {{multiversioned function declaration has a different return type}}
-auto __attribute__((target_version("sve-bf16"))) diff_type2(void) -> long { return (long)1; }
+auto __attribute__((target_version("bf16"))) diff_type2(void) -> long { return (long)1; }
 
 int __attribute__((target_version("fp16fml"))) diff_type3(void) noexcept(false) { return 1; }
 //expected-error@+2 {{exception specification in declaration does not match previous declaration}}
@@ -75,7 +75,7 @@ auto __attribute__((target_version("dpb2"))) ret3(void) -> int { return 1; }
 class Cls {
   __attribute__((target_version("rng"))) Cls();
   // expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support constructors}}
-  __attribute__((target_version("sve-i8mm"))) ~Cls();
+  __attribute__((target_version("i8mm"))) ~Cls();
   // expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support destructors}}
 
   Cls &__attribute__((target_version("f32mm"))) operator=(const Cls &) = default;
diff --git a/clang/test/SemaCXX/conditional-expr.cpp b/clang/test/SemaCXX/conditional-expr.cpp
index 01effaa189322bc..8f17555fd806ff8 100644
--- a/clang/test/SemaCXX/conditional-expr.cpp
+++ b/clang/test/SemaCXX/conditional-expr.cpp
@@ -429,3 +429,10 @@ void g() {
   long e = a = b ? throw 0 : throw 1;
 }
 } // namespace PR46484
+
+namespace GH111854 {
+void f() {
+  (true ? throw 0 : 0) <= 0;  // expected-warning {{relational comparison result unused}}
+  (false ? 0 : throw 0) <= 0; // expected-warning {{relational comparison result unused}}
+}
+}
diff --git a/clang/test/SemaCXX/gh113468.cpp b/clang/test/SemaCXX/gh113468.cpp
new file mode 100644
index 000000000000000..94551986b0efaab
--- /dev/null
+++ b/clang/test/SemaCXX/gh113468.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s
+
+constexpr int expr() {
+  if (({
+        int f;
+        f = 0;
+        if (f)
+          break; // expected-error {{'break' statement not in loop or switch statement}}
+      }))
+    return 2;
+  return 1;
+}
diff --git a/clang/test/SemaCXX/warn-memaccess.cpp b/clang/test/SemaCXX/warn-memaccess.cpp
new file mode 100644
index 000000000000000..070b44891a91aa4
--- /dev/null
+++ b/clang/test/SemaCXX/warn-memaccess.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 -Wnontrivial-memaccess %s
+
+extern "C" void *bzero(void *, unsigned);
+extern "C" void *memset(void *, int, unsigned);
+extern "C" void *memmove(void *s1, const void *s2, unsigned n);
+extern "C" void *memcpy(void *s1, const void *s2, unsigned n);
+
+class TriviallyCopyable {};
+class NonTriviallyCopyable { NonTriviallyCopyable(const NonTriviallyCopyable&);};
+struct Incomplete;
+
+void test_bzero(TriviallyCopyable* tc,
+                NonTriviallyCopyable *ntc,
+                Incomplete* i) {
+  // OK
+  bzero(tc, sizeof(*tc));
+
+  // OK
+  bzero(i, 10);
+
+  // expected-warning@+2{{first argument in call to 'bzero' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}}
+  // expected-note@+1{{explicitly cast the pointer to silence this warning}}
+  bzero(ntc, sizeof(*ntc));
+
+  // OK
+  bzero((void*)ntc, sizeof(*ntc));
+}
+
+void test_memset(TriviallyCopyable* tc,
+                 NonTriviallyCopyable *ntc,
+                 Incomplete* i) {
+  // OK
+  memset(tc, 0, sizeof(*tc));
+
+  // OK
+  memset(i, 0, 10);
+
+  // expected-warning@+2{{first argument in call to 'memset' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}}
+  // expected-note@+1{{explicitly cast the pointer to silence this warning}}
+  memset(ntc, 0, sizeof(*ntc));
+
+  // OK
+  memset((void*)ntc, 0, sizeof(*ntc));
+}
+
+
+void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1,
+                 NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1,
+                 Incomplete *i0, Incomplete *i1) {
+  // OK
+  memcpy(tc0, tc1, sizeof(*tc0));
+
+  // OK
+  memcpy(i0, i1, 10);
+
+  // expected-warning@+2{{first argument in call to 'memcpy' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}}
+  // expected-note@+1{{explicitly cast the pointer to silence this warning}}
+  memcpy(ntc0, ntc1, sizeof(*ntc0));
+
+  // ~ OK
+  memcpy((void*)ntc0, ntc1, sizeof(*ntc0));
+
+  // OK
+  memcpy((void*)ntc0, (void*)ntc1, sizeof(*ntc0));
+}
+
+void test_memmove(TriviallyCopyable* tc0, TriviallyCopyable* tc1,
+                  NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1,
+                  Incomplete *i0, Incomplete *i1) {
+  // OK
+  memmove(tc0, tc1, sizeof(*tc0));
+
+  // OK
+  memmove(i0, i1, 10);
+
+  // expected-warning@+2{{first argument in call to 'memmove' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}}
+  // expected-note@+1{{explicitly cast the pointer to silence this warning}}
+  memmove(ntc0, ntc1, sizeof(*ntc0));
+
+  // ~ OK
+  memmove((void*)ntc0, ntc1, sizeof(*ntc0));
+
+  // OK
+  memmove((void*)ntc0, (void*)ntc1, sizeof(*ntc0));
+}
diff --git a/clang/test/SemaHIP/amdgpu-barrier.hip b/clang/test/SemaHIP/amdgpu-barrier.hip
new file mode 100644
index 000000000000000..ccd99b1e2c1f261
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-barrier.hip
@@ -0,0 +1,20 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -fsyntax-only -verify -triple amdgcn -Wno-unused-value %s
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64 -aux-triple amdgcn -Wno-unused-value %s
+
+#define __device__ __attribute__((device))
+
+__device__ void foo() {
+  int n = 100;
+  __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}}
+  static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}}
+  dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
+  reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}}
+  int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}}
+  __amdgpu_named_workgroup_barrier_t k;
+  int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}}
+  void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}}
+}
+
+static_assert(sizeof(__amdgpu_named_workgroup_barrier_t) == 16, "wrong size");
+static_assert(alignof(__amdgpu_named_workgroup_barrier_t) == 4, "wrong alignment");
diff --git a/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl
index 8c56fdddb1c24c5..4adb0555c35be60 100644
--- a/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/asuint-errors.hlsl
@@ -6,6 +6,10 @@ uint4 test_asuint_too_many_arg(float p0, float p1) {
   // expected-error@-1 {{no matching function for call to 'asuint'}}
   // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'V', but 2 arguments were provided}}
   // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function template not viable: requires single argument 'F', but 2 arguments were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}}
+  // expected-note@hlsl/hlsl_intrinsics.h:* {{candidate function not viable: requires 3 arguments, but 2 were provided}}
 }
 
 uint test_asuint_double(double p1) {
@@ -23,3 +27,29 @@ uint test_asuint_half(half p1) {
     // expected-note@hlsl/hlsl_detail.h:* {{candidate template ignored: could not match 'vector<half, N>' against 'half'}}
     // expected-note@hlsl/hlsl_detail.h:* {{candidate template ignored: substitution failure [with U = uint, T = half]: no type named 'Type'}}
 }
+
+void test_asuint_first_arg_const(double D) {
+  const uint A = 0;
+  uint B;
+  asuint(D, A, B);
+ // expected-error@hlsl/hlsl_intrinsics.h:* {{read-only variable is not assignable}} 
+}
+
+void test_asuint_second_arg_const(double D) {
+  const uint A = 0;
+  uint B;
+  asuint(D, B, A);
+ // expected-error@hlsl/hlsl_intrinsics.h:* {{read-only variable is not assignable}} 
+}
+
+void test_asuint_imidiate_value(double D) {
+  uint B;
+  asuint(D, B, 1);
+ // expected-error@-1 {{cannot bind non-lvalue argument 1 to out paramemter}} 
+}
+
+void test_asuint_expr(double D) {
+  uint B;
+  asuint(D, B, B + 1);
+ // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} 
+}
diff --git a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
index 8d5f0abb2860f8e..5704165e1a45053 100644
--- a/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/countbits-errors.hlsl
@@ -1,6 +1,4 @@
-// RUN: %clang_cc1 -finclude-default-header
-// -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only
-// -disable-llvm-passes -verify -verify-ignore-unexpected
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm-only -disable-llvm-passes -verify -verify-ignore-unexpected
 
 
 double test_int_builtin(double p0) {
@@ -9,13 +7,11 @@ double test_int_builtin(double p0) {
 }
 
 double2 test_int_builtin_2(double2 p0) {
-  return __builtin_elementwise_popcount(p0);
-  // expected-error@-1 {{1st argument must be a vector of integers
-  // (was 'double2' (aka 'vector<double, 2>'))}}
+  return countbits(p0);
+  // expected-error@-1 {{call to 'countbits' is ambiguous}}
 }
 
 double test_int_builtin_3(float p0) {
-  return __builtin_elementwise_popcount(p0);
-  // expected-error@-1 {{1st argument must be a vector of integers
-  // (was 'float')}}
+  return countbits(p0);
+  // expected-error@-1 {{call to 'countbits' is ambiguous}}
 }
diff --git a/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl
new file mode 100644
index 000000000000000..18d2b692b335b9b
--- /dev/null
+++ b/clang/test/SemaHLSL/BuiltIns/splitdouble-errors.hlsl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -verify
+
+void test_no_second_arg(double D) {
+  __builtin_hlsl_elementwise_splitdouble(D);
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 1}} 
+}
+
+void test_no_third_arg(double D) {
+  uint A;
+  __builtin_hlsl_elementwise_splitdouble(D, A);
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 2}} 
+}
+
+void test_too_many_arg(double D) {
+  uint A, B, C;
+  __builtin_hlsl_elementwise_splitdouble(D, A, B, C);
+ // expected-error@-1 {{too many arguments to function call, expected 3, have 4}} 
+}
+
+void test_first_arg_type_mismatch(bool3 D) {
+  uint3 A, B;
+  __builtin_hlsl_elementwise_splitdouble(D, A, B);
+ // expected-error@-1 {{invalid operand of type 'bool3' (aka 'vector<bool, 3>') where 'double' or a vector of such type is required}} 
+}
+
+void test_second_arg_type_mismatch(double D) {
+  bool A;
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, A, B);
+ // expected-error@-1 {{invalid operand of type 'bool' where 'unsigned int' or a vector of such type is required}} 
+}
+
+void test_third_arg_type_mismatch(double D) {
+  bool A;
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, B, A);
+ // expected-error@-1 {{invalid operand of type 'bool' where 'unsigned int' or a vector of such type is required}} 
+}
+
+void test_const_second_arg(double D) {
+  const uint A = 1;
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, A, B);
+ // expected-error@-1 {{cannot bind non-lvalue argument A to out paramemter}} 
+}
+
+void test_const_third_arg(double D) {
+  uint A;
+  const uint B = 1;
+  __builtin_hlsl_elementwise_splitdouble(D, A, B);
+ // expected-error@-1 {{cannot bind non-lvalue argument B to out paramemter}} 
+}
+
+void test_number_second_arg(double D) {
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, (uint)1, B);
+ // expected-error@-1 {{cannot bind non-lvalue argument (uint)1 to out paramemter}} 
+}
+
+void test_number_third_arg(double D) {
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, B, (uint)1);
+ // expected-error@-1 {{cannot bind non-lvalue argument (uint)1 to out paramemter}} 
+}
+
+void test_expr_second_arg(double D) {
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, B+1, B);
+ // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} 
+}
+
+void test_expr_third_arg(double D) {
+  uint B;
+  __builtin_hlsl_elementwise_splitdouble(D, B, B+1);
+ // expected-error@-1 {{cannot bind non-lvalue argument B + 1 to out paramemter}} 
+}
diff --git a/clang/test/SemaOpenCL/amdgpu-barrier.cl b/clang/test/SemaOpenCL/amdgpu-barrier.cl
new file mode 100644
index 000000000000000..150c311c7c59303
--- /dev/null
+++ b/clang/test/SemaOpenCL/amdgpu-barrier.cl
@@ -0,0 +1,12 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -Wno-unused-value %s
+// RUN: %clang_cc1 -verify -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -Wno-unused-value %s
+
+void foo() {
+    int n = 100;
+    __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{initializing '__private __amdgpu_named_workgroup_barrier_t' with an expression of incompatible type 'int'}}
+    int c = v; // expected-error {{initializing '__private int' with an expression of incompatible type '__private __amdgpu_named_workgroup_barrier_t'}}
+    __amdgpu_named_workgroup_barrier_t k;
+    int *ip = (int *)k; // expected-error {{operand of type '__amdgpu_named_workgroup_barrier_t' where arithmetic or pointer type is required}}
+    void *vp = (void *)k; // expected-error {{operand of type '__amdgpu_named_workgroup_barrier_t' where arithmetic or pointer type is required}}
+ }
diff --git a/clang/test/SemaOpenMP/amdgpu-barrier.cpp b/clang/test/SemaOpenMP/amdgpu-barrier.cpp
new file mode 100644
index 000000000000000..70aaefd080885e6
--- /dev/null
+++ b/clang/test/SemaOpenMP/amdgpu-barrier.cpp
@@ -0,0 +1,17 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -triple amdgcn-amd-amdhsa -fopenmp-is-target-device -Wno-unused-value %s
+
+void foo() {
+#pragma omp target
+  {
+    int n = 100;
+    __amdgpu_named_workgroup_barrier_t v = 0; // expected-error {{cannot initialize a variable of type '__amdgpu_named_workgroup_barrier_t' with an rvalue of type 'int'}}
+    static_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{static_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}}
+    dynamic_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{invalid target type '__amdgpu_named_workgroup_barrier_t' for dynamic_cast; target type must be a reference or pointer type to a defined class}}
+    reinterpret_cast<__amdgpu_named_workgroup_barrier_t>(n); // expected-error {{reinterpret_cast from 'int' to '__amdgpu_named_workgroup_barrier_t' is not allowed}}
+    int c(v); // expected-error {{cannot initialize a variable of type 'int' with an lvalue of type '__amdgpu_named_workgroup_barrier_t'}}
+    __amdgpu_named_workgroup_barrier_t k;
+    int *ip = (int *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'int *'}}
+    void *vp = (void *)k; // expected-error {{cannot cast from type '__amdgpu_named_workgroup_barrier_t' to pointer type 'void *'}}
+  }
+ }
diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp
index 96fb85e99bf5f0f..cc735e48725921c 100644
--- a/clang/tools/clang-format/ClangFormat.cpp
+++ b/clang/tools/clang-format/ClangFormat.cpp
@@ -410,7 +410,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) {
   const bool IsSTDIN = FileName == "-";
   if (!OutputXML && Inplace && IsSTDIN) {
     errs() << "error: cannot use -i when reading from stdin.\n";
-    return false;
+    return true;
   }
   // On Windows, overwriting a file with an open file mapping doesn't work,
   // so read the whole file into memory when formatting in-place.
@@ -419,7 +419,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) {
           ? MemoryBuffer::getFileAsStream(FileName)
           : MemoryBuffer::getFileOrSTDIN(FileName, /*IsText=*/true);
   if (std::error_code EC = CodeOrErr.getError()) {
-    errs() << EC.message() << "\n";
+    errs() << FileName << ": " << EC.message() << "\n";
     return true;
   }
   std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get());
@@ -510,7 +510,7 @@ static bool format(StringRef FileName, bool ErrorOnIncompleteFormat = false) {
       reformat(*FormatStyle, *ChangedCode, Ranges, AssumedFileName, &Status);
   Replaces = Replaces.merge(FormatChanges);
   if (DryRun) {
-    return Replaces.size() > (IsJson ? 1 : 0) &&
+    return Replaces.size() > (IsJson ? 1u : 0u) &&
            emitReplacementWarnings(Replaces, AssumedFileName, Code);
   }
   if (OutputXML) {
diff --git a/clang/tools/clang-format/git-clang-format.bat b/clang/tools/clang-format/git-clang-format.bat
index 19c82d8a04132b6..a40276e63c5848b 100644
--- a/clang/tools/clang-format/git-clang-format.bat
+++ b/clang/tools/clang-format/git-clang-format.bat
@@ -1 +1 @@
-py -3 "%~pn0" %*
+py -3 "%~dpn0" %*
diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
index 9fea1fdcd5fb466..561b73c73ad7df9 100644
--- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
+++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
@@ -140,9 +140,6 @@ static std::list<SmallString<128>> TempFiles;
 /// Codegen flags for LTO backend.
 static codegen::RegisterCodeGenFlags CodeGenFlags;
 
-/// Global flag to indicate that the LTO pipeline threw an error.
-static std::atomic<bool> LTOError;
-
 using OffloadingImage = OffloadBinary::OffloadingImage;
 
 namespace llvm {
@@ -293,12 +290,10 @@ Expected<std::string> findProgram(StringRef Name, ArrayRef<StringRef> Paths) {
   return *Path;
 }
 
-/// We will defer LTO to the target's linker if we are not doing JIT and it is
-/// supported by the toolchain.
 bool linkerSupportsLTO(const ArgList &Args) {
   llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
   return Triple.isNVPTX() || Triple.isAMDGPU() ||
-         Args.getLastArgValue(OPT_linker_path_EQ).ends_with("ld.lld");
+         Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld");
 }
 
 /// Returns the hashed value for a constant string.
@@ -370,6 +365,8 @@ Error runLinker(ArrayRef<StringRef> Files, const ArgList &Args) {
   // Render the linker arguments and add the newly created image. We add it
   // after the output file to ensure it is linked with the correct libraries.
   StringRef LinkerPath = Args.getLastArgValue(OPT_linker_path_EQ);
+  if (LinkerPath.empty())
+    return createStringError("linker path missing, must pass 'linker-path'");
   ArgStringList NewLinkerArgs;
   for (const opt::Arg *Arg : Args) {
     // Do not forward arguments only intended for the linker wrapper.
@@ -526,13 +523,11 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args) {
   };
 
   // Forward all of the `--offload-opt` and similar options to the device.
-  if (linkerSupportsLTO(Args)) {
-    CmdArgs.push_back("-flto");
-    for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
-      CmdArgs.append(
-          {"-Xlinker",
-           Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))});
-  }
+  CmdArgs.push_back("-flto");
+  for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm))
+    CmdArgs.append(
+        {"-Xlinker",
+         Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))});
 
   if (!Triple.isNVPTX())
     CmdArgs.push_back("-Wl,--no-undefined");
@@ -644,357 +639,6 @@ Expected<StringRef> linkDevice(ArrayRef<StringRef> InputFiles,
   }
 }
 
-void diagnosticHandler(const DiagnosticInfo &DI) {
-  std::string ErrStorage;
-  raw_string_ostream OS(ErrStorage);
-  DiagnosticPrinterRawOStream DP(OS);
-  DI.print(DP);
-
-  switch (DI.getSeverity()) {
-  case DS_Error:
-    WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n";
-    LTOError = true;
-    break;
-  case DS_Warning:
-    WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n";
-    break;
-  case DS_Note:
-    WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n";
-    break;
-  case DS_Remark:
-    WithColor::remark(errs()) << ErrStorage << "\n";
-    break;
-  }
-}
-
-// Get the list of target features from the input file and unify them such that
-// if there are multiple +xxx or -xxx features we only keep the last one.
-std::vector<std::string> getTargetFeatures(ArrayRef<OffloadFile> InputFiles) {
-  SmallVector<StringRef> Features;
-  for (const OffloadFile &File : InputFiles) {
-    for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ","))
-      Features.emplace_back(Arg);
-  }
-
-  // Only add a feature if it hasn't been seen before starting from the end.
-  std::vector<std::string> UnifiedFeatures;
-  DenseSet<StringRef> UsedFeatures;
-  for (StringRef Feature : llvm::reverse(Features)) {
-    if (UsedFeatures.insert(Feature.drop_front()).second)
-      UnifiedFeatures.push_back(Feature.str());
-  }
-
-  return UnifiedFeatures;
-}
-
-template <typename ModuleHook = function_ref<bool(size_t, const Module &)>>
-std::unique_ptr<lto::LTO> createLTO(
-    const ArgList &Args, const std::vector<std::string> &Features,
-    ModuleHook Hook = [](size_t, const Module &) { return true; }) {
-  const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
-  // We need to remove AMD's target-id from the processor if present.
-  StringRef TargetID = Args.getLastArgValue(OPT_arch_EQ);
-  StringRef Arch = clang::getProcessorFromTargetID(Triple, TargetID);
-  lto::Config Conf;
-  lto::ThinBackend Backend;
-  // TODO: Handle index-only thin-LTO
-  Backend =
-      lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
-
-  Conf.CPU = Arch.str();
-  Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple);
-
-  Conf.RemarksFilename = RemarksFilename;
-  Conf.RemarksPasses = RemarksPasses;
-  Conf.RemarksWithHotness = RemarksWithHotness;
-  Conf.RemarksHotnessThreshold = RemarksHotnessThreshold;
-  Conf.RemarksFormat = RemarksFormat;
-
-  StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2");
-  Conf.MAttrs = Features;
-  std::optional<CodeGenOptLevel> CGOptLevelOrNone =
-      CodeGenOpt::parseLevel(OptLevel[1]);
-  assert(CGOptLevelOrNone && "Invalid optimization level");
-  Conf.CGOptLevel = *CGOptLevelOrNone;
-  Conf.OptLevel = OptLevel[1] - '0';
-  Conf.DefaultTriple = Triple.getTriple();
-
-  // TODO: Should we complain about combining --opt-level and -passes, as opt
-  // does?  That might be too limiting in clang-linker-wrapper, so for now we
-  // just warn in the help entry for -passes that the default<O?> corresponding
-  // to --opt-level=O? should be included there.  The problem is that
-  // --opt-level produces effects in clang-linker-wrapper beyond what -passes
-  // appears to be able to achieve, so rejecting the combination of --opt-level
-  // and -passes would apparently make it impossible to combine those effects
-  // with a custom pass pipeline.
-  Conf.OptPipeline = PassPipeline;
-  Conf.PassPlugins = PassPlugins;
-
-  LTOError = false;
-  Conf.DiagHandler = diagnosticHandler;
-
-  Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
-  Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
-
-  if (SaveTemps) {
-    std::string TempName = (sys::path::filename(ExecutableName) + "." +
-                            Triple.getTriple() + "." + TargetID)
-                               .str();
-    Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) {
-      std::string File =
-          !Task ? TempName + ".postlink.bc"
-                : TempName + "." + std::to_string(Task) + ".postlink.bc";
-      error_code EC;
-      raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
-      if (EC)
-        reportError(errorCodeToError(EC));
-      WriteBitcodeToFile(M, LinkedBitcode);
-      return true;
-    };
-    Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) {
-      std::string File =
-          !Task ? TempName + ".postopt.bc"
-                : TempName + "." + std::to_string(Task) + ".postopt.bc";
-      error_code EC;
-      raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None);
-      if (EC)
-        reportError(errorCodeToError(EC));
-      WriteBitcodeToFile(M, LinkedBitcode);
-      return true;
-    };
-  }
-  Conf.PostOptModuleHook = Hook;
-  Conf.CGFileType = (Triple.isNVPTX() || SaveTemps)
-                        ? CodeGenFileType::AssemblyFile
-                        : CodeGenFileType::ObjectFile;
-
-  // TODO: Handle remark files
-  Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program);
-
-  return std::make_unique<lto::LTO>(std::move(Conf), Backend);
-}
-
-// Returns true if \p S is valid as a C language identifier and will be given
-// `__start_` and `__stop_` symbols.
-bool isValidCIdentifier(StringRef S) {
-  return !S.empty() && (isAlpha(S[0]) || S[0] == '_') &&
-         llvm::all_of(llvm::drop_begin(S),
-                      [](char C) { return C == '_' || isAlnum(C); });
-}
-
-Error linkBitcodeFiles(SmallVectorImpl<OffloadFile> &InputFiles,
-                       SmallVectorImpl<StringRef> &OutputFiles,
-                       const ArgList &Args) {
-  llvm::TimeTraceScope TimeScope("Link bitcode files");
-  const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
-  StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
-
-  SmallVector<OffloadFile, 4> BitcodeInputFiles;
-  DenseSet<StringRef> StrongResolutions;
-  DenseSet<StringRef> UsedInRegularObj;
-  DenseSet<StringRef> UsedInSharedLib;
-  BumpPtrAllocator Alloc;
-  StringSaver Saver(Alloc);
-
-  // Search for bitcode files in the input and create an LTO input file. If
-  // it is not a bitcode file, scan its symbol table for symbols we need to
-  // save.
-  for (OffloadFile &File : InputFiles) {
-    MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), "");
-
-    file_magic Type = identify_magic(Buffer.getBuffer());
-    switch (Type) {
-    case file_magic::bitcode: {
-      Expected<IRSymtabFile> IRSymtabOrErr = readIRSymtab(Buffer);
-      if (!IRSymtabOrErr)
-        return IRSymtabOrErr.takeError();
-
-      // Check for any strong resolutions we need to preserve.
-      for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) {
-        for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) {
-          if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() &&
-              !Sym.isUndefined())
-            StrongResolutions.insert(Saver.save(Sym.Name));
-        }
-      }
-      BitcodeInputFiles.emplace_back(std::move(File));
-      continue;
-    }
-    case file_magic::elf_relocatable:
-    case file_magic::elf_shared_object: {
-      Expected<std::unique_ptr<ObjectFile>> ObjFile =
-          ObjectFile::createObjectFile(Buffer);
-      if (!ObjFile)
-        continue;
-
-      for (SymbolRef Sym : (*ObjFile)->symbols()) {
-        Expected<StringRef> Name = Sym.getName();
-        if (!Name)
-          return Name.takeError();
-
-        // Record if we've seen these symbols in any object or shared
-        // libraries.
-        if ((*ObjFile)->isRelocatableObject())
-          UsedInRegularObj.insert(Saver.save(*Name));
-        else
-          UsedInSharedLib.insert(Saver.save(*Name));
-      }
-      continue;
-    }
-    default:
-      continue;
-    }
-  }
-
-  if (BitcodeInputFiles.empty())
-    return Error::success();
-
-  // Remove all the bitcode files that we moved from the original input.
-  llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); });
-
-  // LTO Module hook to output bitcode without running the backend.
-  SmallVector<StringRef> BitcodeOutput;
-  auto OutputBitcode = [&](size_t, const Module &M) {
-    auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) +
-                                              "-jit-" + Triple.getTriple(),
-                                          "bc");
-    if (!TempFileOrErr)
-      reportError(TempFileOrErr.takeError());
-
-    std::error_code EC;
-    raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None);
-    if (EC)
-      reportError(errorCodeToError(EC));
-    WriteBitcodeToFile(M, LinkedBitcode);
-    BitcodeOutput.push_back(*TempFileOrErr);
-    return false;
-  };
-
-  // We assume visibility of the whole program if every input file was
-  // bitcode.
-  auto Features = getTargetFeatures(BitcodeInputFiles);
-  auto LTOBackend = Args.hasArg(OPT_embed_bitcode) ||
-                            Args.hasArg(OPT_builtin_bitcode_EQ) ||
-                            Args.hasArg(OPT_clang_backend)
-                        ? createLTO(Args, Features, OutputBitcode)
-                        : createLTO(Args, Features);
-
-  // We need to resolve the symbols so the LTO backend knows which symbols
-  // need to be kept or can be internalized. This is a simplified symbol
-  // resolution scheme to approximate the full resolution a linker would do.
-  uint64_t Idx = 0;
-  DenseSet<StringRef> PrevailingSymbols;
-  for (auto &BitcodeInput : BitcodeInputFiles) {
-    // Get a semi-unique buffer identifier for Thin-LTO.
-    StringRef Identifier = Saver.save(
-        std::to_string(Idx++) + "." +
-        BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier());
-    MemoryBufferRef Buffer =
-        MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier);
-    Expected<std::unique_ptr<lto::InputFile>> BitcodeFileOrErr =
-        llvm::lto::InputFile::create(Buffer);
-    if (!BitcodeFileOrErr)
-      return BitcodeFileOrErr.takeError();
-
-    // Save the input file and the buffer associated with its memory.
-    const auto Symbols = (*BitcodeFileOrErr)->symbols();
-    SmallVector<lto::SymbolResolution, 16> Resolutions(Symbols.size());
-    size_t Idx = 0;
-    for (auto &Sym : Symbols) {
-      lto::SymbolResolution &Res = Resolutions[Idx++];
-
-      // We will use this as the prevailing symbol definition in LTO unless
-      // it is undefined or another definition has already been used.
-      Res.Prevailing =
-          !Sym.isUndefined() &&
-          !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) &&
-          PrevailingSymbols.insert(Saver.save(Sym.getName())).second;
-
-      // We need LTO to preseve the following global symbols:
-      // 1) Symbols used in regular objects.
-      // 2) Sections that will be given a __start/__stop symbol.
-      // 3) Prevailing symbols that are needed visible to external
-      // libraries.
-      Res.VisibleToRegularObj =
-          UsedInRegularObj.contains(Sym.getName()) ||
-          isValidCIdentifier(Sym.getSectionName()) ||
-          (Res.Prevailing &&
-           (Sym.getVisibility() != GlobalValue::HiddenVisibility &&
-            !Sym.canBeOmittedFromSymbolTable()));
-
-      // Identify symbols that must be exported dynamically and can be
-      // referenced by other files.
-      Res.ExportDynamic =
-          Sym.getVisibility() != GlobalValue::HiddenVisibility &&
-          (UsedInSharedLib.contains(Sym.getName()) ||
-           !Sym.canBeOmittedFromSymbolTable());
-
-      // The final definition will reside in this linkage unit if the symbol
-      // is defined and local to the module. This only checks for bitcode
-      // files, full assertion will require complete symbol resolution.
-      Res.FinalDefinitionInLinkageUnit =
-          Sym.getVisibility() != GlobalValue::DefaultVisibility &&
-          (!Sym.isUndefined() && !Sym.isCommon());
-
-      // We do not support linker redefined symbols (e.g. --wrap) for device
-      // image linking, so the symbols will not be changed after LTO.
-      Res.LinkerRedefined = false;
-    }
-
-    // Add the bitcode file with its resolved symbols to the LTO job.
-    if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions))
-      return Err;
-  }
-
-  // Run the LTO job to compile the bitcode.
-  size_t MaxTasks = LTOBackend->getMaxTasks();
-  SmallVector<StringRef> Files(MaxTasks);
-  auto AddStream =
-      [&](size_t Task,
-          const Twine &ModuleName) -> std::unique_ptr<CachedFileStream> {
-    int FD = -1;
-    auto &TempFile = Files[Task];
-    StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o";
-    std::string TaskStr = Task ? "." + std::to_string(Task) : "";
-    auto TempFileOrErr =
-        createOutputFile(sys::path::filename(ExecutableName) + "." +
-                             Triple.getTriple() + "." + Arch + TaskStr,
-                         Extension);
-    if (!TempFileOrErr)
-      reportError(TempFileOrErr.takeError());
-    TempFile = *TempFileOrErr;
-    if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD))
-      reportError(errorCodeToError(EC));
-    return std::make_unique<CachedFileStream>(
-        std::make_unique<llvm::raw_fd_ostream>(FD, true));
-  };
-
-  if (Error Err = LTOBackend->run(AddStream))
-    return Err;
-
-  if (LTOError)
-    return createStringError("Errors encountered inside the LTO pipeline.");
-
-  // If we are embedding bitcode we only need the intermediate output.
-  bool SingleOutput = Files.size() == 1;
-  if (Args.hasArg(OPT_embed_bitcode)) {
-    if (BitcodeOutput.size() != 1 || !SingleOutput)
-      return createStringError("Cannot embed bitcode with multiple files.");
-    OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front()));
-    return Error::success();
-  }
-
-  // Append the new inputs to the device linker input. If the user requested
-  // an internalizing link we need to pass the bitcode to clang.
-  for (StringRef File :
-       Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ)
-           ? BitcodeOutput
-           : Files)
-    OutputFiles.push_back(File);
-
-  return Error::success();
-}
-
 Expected<StringRef> writeOffloadFile(const OffloadFile &File) {
   const OffloadBinary &Binary = *File.getBinary();
 
@@ -1325,15 +969,8 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
       if (File.getBinary()->getOffloadKind() != OFK_None)
         ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind());
 
-    // First link and remove all the input files containing bitcode if
-    // the target linker does not support it natively.
+    // Write any remaining device inputs to an output file.
     SmallVector<StringRef> InputFiles;
-    if (!linkerSupportsLTO(LinkerArgs))
-      if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs))
-        return Err;
-
-    // Write any remaining device inputs to an output file for the
-    // linker.
     for (const OffloadFile &File : Input) {
       auto FileNameOrErr = writeOffloadFile(File);
       if (!FileNameOrErr)
@@ -1342,10 +979,7 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
     }
 
     // Link the remaining device files using the device linker.
-    auto OutputOrErr =
-        !Args.hasArg(OPT_embed_bitcode) || linkerSupportsLTO(LinkerArgs)
-            ? linkDevice(InputFiles, LinkerArgs)
-            : InputFiles.front();
+    auto OutputOrErr = linkDevice(InputFiles, LinkerArgs);
     if (!OutputOrErr)
       return OutputOrErr.takeError();
 
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
index b9767a7a03d0b59..bc191afdca739df 100644
--- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
+++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp
@@ -250,6 +250,7 @@ struct Symbol {
   };
 
   Symbol() : File(), Flags(None), UsedInRegularObj(false) {}
+  Symbol(Symbol::Flags Flags) : File(), Flags(Flags), UsedInRegularObj(true) {}
 
   Symbol(MemoryBufferRef File, const irsymtab::Reader::SymbolRef Sym)
       : File(File), Flags(0), UsedInRegularObj(false) {
@@ -535,6 +536,8 @@ Expected<SmallVector<StringRef>> getInput(const ArgList &Args) {
 
   bool Extracted = true;
   StringMap<Symbol> SymTab;
+  for (auto &Sym : Args.getAllArgValues(OPT_u))
+    SymTab[Sym] = Symbol(Symbol::Undefined);
   SmallVector<std::unique_ptr<MemoryBuffer>> LinkerInput;
   while (Extracted) {
     Extracted = false;
diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
index a80c5937b429923..6de1a25c14f8be0 100644
--- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
+++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td
@@ -43,11 +43,11 @@ def plugin : JoinedOrSeparate<["--", "-"], "plugin">,
   Flags<[HelpHidden, WrapperOnlyOption]>;
 
 def arch : Separate<["--", "-"], "arch">,
-  HelpText<"Specify the 'sm_' name of the target architecture.">;
+  HelpText<"Specify the 'sm_' name of the target architecture">;
 def : Joined<["--", "-"], "plugin-opt=mcpu=">,
   Flags<[HelpHidden, WrapperOnlyOption]>, Alias<arch>;
 
-def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile.">;
+def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile">;
 def debug : Flag<["--"], "debug">, Alias<g>;
 
 def lto_emit_llvm : Flag<["--"], "lto-emit-llvm">, Flags<[WrapperOnlyOption]>,
@@ -55,6 +55,9 @@ def lto_emit_llvm : Flag<["--"], "lto-emit-llvm">, Flags<[WrapperOnlyOption]>,
 def lto_emit_asm : Flag<["--"], "lto-emit-asm">, Flags<[WrapperOnlyOption]>,
   HelpText<"Emit assembly code">;
 
+def u : JoinedOrSeparate<["-"], "u">, HelpText<"Force undefined symbol during linking">;
+def undefined : JoinedOrSeparate<["--"], "undefined">, Alias<u>;
+
 def O : Joined<["--", "-"], "plugin-opt=O">,
   Flags<[WrapperOnlyOption]>, MetaVarName<"<O0, O1, O2, or O3>">,
   HelpText<"Optimization level for LTO">;
diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
index 7d36cee7a22b391..f474b1346b1be10 100644
--- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp
+++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp
@@ -1080,10 +1080,15 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) {
                  << NumExistsCalls << " exists() calls\n"
                  << NumIsLocalCalls << " isLocal() calls\n";
 
-  if (PrintTiming)
-    llvm::errs() << llvm::format(
-        "clang-scan-deps timing: %0.2fs wall, %0.2fs process\n",
-        T.getTotalTime().getWallTime(), T.getTotalTime().getProcessTime());
+  if (PrintTiming) {
+    llvm::errs() << "wall time [s]\t"
+                 << "process time [s]\t"
+                 << "instruction count\n";
+    const llvm::TimeRecord &R = T.getTotalTime();
+    llvm::errs() << llvm::format("%0.4f", R.getWallTime()) << "\t"
+                 << llvm::format("%0.4f", R.getProcessTime()) << "\t"
+                 << llvm::format("%llu", R.getInstructionsExecuted()) << "\n";
+  }
 
   if (RoundTripArgs)
     if (FD && FD->roundTripCommands(llvm::errs()))
diff --git a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp
index 22fe347c425593f..de16f6be8eedbc9 100644
--- a/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/UncheckedOptionalAccessModelTest.cpp
@@ -1282,28 +1282,35 @@ static raw_ostream &operator<<(raw_ostream &OS,
 class UncheckedOptionalAccessTest
     : public ::testing::TestWithParam<OptionalTypeIdentifier> {
 protected:
-  void ExpectDiagnosticsFor(std::string SourceCode) {
-    ExpectDiagnosticsFor(SourceCode, ast_matchers::hasName("target"));
+  void ExpectDiagnosticsFor(std::string SourceCode,
+                            bool IgnoreSmartPointerDereference = true) {
+    ExpectDiagnosticsFor(SourceCode, ast_matchers::hasName("target"),
+                         IgnoreSmartPointerDereference);
   }
 
-  void ExpectDiagnosticsForLambda(std::string SourceCode) {
+  void ExpectDiagnosticsForLambda(std::string SourceCode,
+                                  bool IgnoreSmartPointerDereference = true) {
     ExpectDiagnosticsFor(
-        SourceCode, ast_matchers::hasDeclContext(
-                        ast_matchers::cxxRecordDecl(ast_matchers::isLambda())));
+        SourceCode,
+        ast_matchers::hasDeclContext(
+            ast_matchers::cxxRecordDecl(ast_matchers::isLambda())),
+        IgnoreSmartPointerDereference);
   }
 
   template <typename FuncDeclMatcher>
-  void ExpectDiagnosticsFor(std::string SourceCode,
-                            FuncDeclMatcher FuncMatcher) {
+  void ExpectDiagnosticsFor(std::string SourceCode, FuncDeclMatcher FuncMatcher,
+                            bool IgnoreSmartPointerDereference = true) {
     // Run in C++17 and C++20 mode to cover differences in the AST between modes
     // (e.g. C++20 can contain `CXXRewrittenBinaryOperator`).
     for (const char *CxxMode : {"-std=c++17", "-std=c++20"})
-      ExpectDiagnosticsFor(SourceCode, FuncMatcher, CxxMode);
+      ExpectDiagnosticsFor(SourceCode, FuncMatcher, CxxMode,
+                           IgnoreSmartPointerDereference);
   }
 
   template <typename FuncDeclMatcher>
   void ExpectDiagnosticsFor(std::string SourceCode, FuncDeclMatcher FuncMatcher,
-                            const char *CxxMode) {
+                            const char *CxxMode,
+                            bool IgnoreSmartPointerDereference) {
     ReplaceAllOccurrences(SourceCode, "$ns", GetParam().NamespaceName);
     ReplaceAllOccurrences(SourceCode, "$optional", GetParam().TypeName);
 
@@ -1328,8 +1335,7 @@ class UncheckedOptionalAccessTest
       template <typename T>
       T Make();
     )");
-    UncheckedOptionalAccessModelOptions Options{
-        /*IgnoreSmartPointerDereference=*/true};
+    UncheckedOptionalAccessModelOptions Options{IgnoreSmartPointerDereference};
     std::vector<SourceLocation> Diagnostics;
     llvm::Error Error = checkDataflow<UncheckedOptionalAccessModel>(
         AnalysisInputs<UncheckedOptionalAccessModel>(
@@ -2167,7 +2173,7 @@ TEST_P(UncheckedOptionalAccessTest, OptionalReturnedFromFuntionCall) {
   )");
 }
 
-TEST_P(UncheckedOptionalAccessTest, OptionalFieldModified) {
+TEST_P(UncheckedOptionalAccessTest, NonConstMethodMayClearOptionalField) {
   ExpectDiagnosticsFor(
       R"(
     #include "unchecked_optional_access_test.h"
@@ -2187,6 +2193,27 @@ TEST_P(UncheckedOptionalAccessTest, OptionalFieldModified) {
   )");
 }
 
+TEST_P(UncheckedOptionalAccessTest,
+       NonConstMethodMayNotClearConstOptionalField) {
+  ExpectDiagnosticsFor(
+      R"(
+    #include "unchecked_optional_access_test.h"
+
+    struct Foo {
+      const $ns::$optional<std::string> opt;
+      void clear();
+    };
+
+    void target(Foo& foo) {
+      if (foo.opt) {
+        foo.opt.value();
+        foo.clear();
+        foo.opt.value();
+      }
+    }
+  )");
+}
+
 TEST_P(UncheckedOptionalAccessTest, StdSwap) {
   ExpectDiagnosticsFor(
       R"(
@@ -3700,6 +3727,50 @@ TEST_P(UncheckedOptionalAccessTest, ConstByValueAccessorWithModInBetween) {
   )cc");
 }
 
+TEST_P(UncheckedOptionalAccessTest, ConstPointerAccessor) {
+  ExpectDiagnosticsFor(R"cc(
+     #include "unchecked_optional_access_test.h"
+
+    struct A {
+      $ns::$optional<int> x;
+    };
+
+    struct MyUniquePtr {
+      A* operator->() const;
+    };
+
+    void target(MyUniquePtr p) {
+      if (p->x) {
+        *p->x;
+      }
+    }
+  )cc",
+                       /*IgnoreSmartPointerDereference=*/false);
+}
+
+TEST_P(UncheckedOptionalAccessTest, ConstPointerAccessorWithModInBetween) {
+  ExpectDiagnosticsFor(R"cc(
+    #include "unchecked_optional_access_test.h"
+
+    struct A {
+      $ns::$optional<int> x;
+    };
+
+    struct MyUniquePtr {
+      A* operator->() const;
+      void reset(A*);
+    };
+
+    void target(MyUniquePtr p) {
+      if (p->x) {
+        p.reset(nullptr);
+        *p->x;  // [[unsafe]]
+      }
+    }
+  )cc",
+                       /*IgnoreSmartPointerDereference=*/false);
+}
+
 TEST_P(UncheckedOptionalAccessTest, ConstBoolAccessor) {
   ExpectDiagnosticsFor(R"cc(
     #include "unchecked_optional_access_test.h"
diff --git a/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp b/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp
index b13e7123ee524d1..0f6e49bf42f4acc 100644
--- a/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp
+++ b/clang/unittests/StaticAnalyzer/MemRegionDescriptiveNameTest.cpp
@@ -12,7 +12,6 @@
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h"
 #include "gtest/gtest.h"
-#include <fstream>
 
 using namespace clang;
 using namespace ento;
@@ -143,4 +142,32 @@ void top() {
   EXPECT_EQ(Output, "DescriptiveNameChecker: array[x]\n");
 }
 
+TEST(MemRegionDescriptiveNameTest, FieldRegWithSuperElementReg) {
+  StringRef Code = R"cpp(
+void reportDescriptiveName(int *p);
+struct val_struct { int val; };
+extern struct val_struct val_struct_array[3];
+void top() {
+  reportDescriptiveName(&val_struct_array[0].val);
+})cpp";
+
+  std::string Output;
+  ASSERT_TRUE(runChecker(Code, Output));
+  EXPECT_EQ(Output, "DescriptiveNameChecker: val_struct_array[0].val\n");
+}
+
+TEST(MemRegionDescriptiveNameTest, FieldRegWithSuperMultidimElementReg) {
+  StringRef Code = R"cpp(
+void reportDescriptiveName(int *p);
+struct val_struct { int val; };
+extern struct val_struct val_struct_array[3][4];
+void top() {
+  reportDescriptiveName(&val_struct_array[1][2].val);
+})cpp";
+
+  std::string Output;
+  ASSERT_TRUE(runChecker(Code, Output));
+  EXPECT_EQ(Output, "DescriptiveNameChecker: val_struct_array[1][2].val\n");
+}
+
 } // namespace
diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 4890d249c6d8f78..3031d81b3df7312 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -49,40 +49,38 @@ using namespace llvm;
 namespace {
 
 class FlattenedSpelling {
-  std::string V, N, NS;
+  StringRef V, N, NS;
   bool K = false;
   const Record &OriginalSpelling;
 
 public:
-  FlattenedSpelling(const std::string &Variety, const std::string &Name,
-                    const std::string &Namespace, bool KnownToGCC,
-                    const Record &OriginalSpelling)
+  FlattenedSpelling(StringRef Variety, StringRef Name, StringRef Namespace,
+                    bool KnownToGCC, const Record &OriginalSpelling)
       : V(Variety), N(Name), NS(Namespace), K(KnownToGCC),
         OriginalSpelling(OriginalSpelling) {}
   explicit FlattenedSpelling(const Record &Spelling)
-      : V(std::string(Spelling.getValueAsString("Variety"))),
-        N(std::string(Spelling.getValueAsString("Name"))),
-        OriginalSpelling(Spelling) {
+      : V(Spelling.getValueAsString("Variety")),
+        N(Spelling.getValueAsString("Name")), OriginalSpelling(Spelling) {
     assert(V != "GCC" && V != "Clang" &&
            "Given a GCC spelling, which means this hasn't been flattened!");
     if (V == "CXX11" || V == "C23" || V == "Pragma")
-      NS = std::string(Spelling.getValueAsString("Namespace"));
+      NS = Spelling.getValueAsString("Namespace");
   }
 
-  const std::string &variety() const { return V; }
-  const std::string &name() const { return N; }
-  const std::string &nameSpace() const { return NS; }
+  StringRef variety() const { return V; }
+  StringRef name() const { return N; }
+  StringRef nameSpace() const { return NS; }
   bool knownToGCC() const { return K; }
   const Record &getSpellingRecord() const { return OriginalSpelling; }
 };
 
 struct FlattenedSpellingInfo {
-  FlattenedSpellingInfo(std::string Syntax, std::string Scope,
-                        std::string TargetTest, uint32_t ArgMask)
+  FlattenedSpellingInfo(StringRef Syntax, StringRef Scope,
+                        const std::string &TargetTest, uint32_t ArgMask)
       : Syntax(Syntax), Scope(Scope), TargetTest(TargetTest), ArgMask(ArgMask) {
   }
-  std::string Syntax;
-  std::string Scope;
+  StringRef Syntax;
+  StringRef Scope;
   std::string TargetTest;
   uint32_t ArgMask;
 };
@@ -105,17 +103,18 @@ GetFlattenedSpellings(const Record &Attr) {
     StringRef Variety = Spelling->getValueAsString("Variety");
     StringRef Name = Spelling->getValueAsString("Name");
     if (Variety == "GCC") {
-      Ret.emplace_back("GNU", std::string(Name), "", true, *Spelling);
-      Ret.emplace_back("CXX11", std::string(Name), "gnu", true, *Spelling);
+      Ret.emplace_back("GNU", Name, "", true, *Spelling);
+      Ret.emplace_back("CXX11", Name, "gnu", true, *Spelling);
       if (Spelling->getValueAsBit("AllowInC"))
-        Ret.emplace_back("C23", std::string(Name), "gnu", true, *Spelling);
+        Ret.emplace_back("C23", Name, "gnu", true, *Spelling);
     } else if (Variety == "Clang") {
-      Ret.emplace_back("GNU", std::string(Name), "", false, *Spelling);
-      Ret.emplace_back("CXX11", std::string(Name), "clang", false, *Spelling);
+      Ret.emplace_back("GNU", Name, "", false, *Spelling);
+      Ret.emplace_back("CXX11", Name, "clang", false, *Spelling);
       if (Spelling->getValueAsBit("AllowInC"))
-        Ret.emplace_back("C23", std::string(Name), "clang", false, *Spelling);
-    } else
+        Ret.emplace_back("C23", Name, "clang", false, *Spelling);
+    } else {
       Ret.push_back(FlattenedSpelling(*Spelling));
+    }
   }
 
   return Ret;
@@ -123,9 +122,7 @@ GetFlattenedSpellings(const Record &Attr) {
 
 static std::string ReadPCHRecord(StringRef type) {
   return StringSwitch<std::string>(type)
-      .EndsWith("Decl *", "Record.readDeclAs<" +
-                              std::string(type.data(), 0, type.size() - 1) +
-                              ">()")
+      .EndsWith("Decl *", "Record.readDeclAs<" + type.drop_back().str() + ">()")
       .Case("TypeSourceInfo *", "Record.readTypeSourceInfo()")
       .Case("Expr *", "Record.readExpr()")
       .Case("IdentifierInfo *", "Record.readIdentifier()")
@@ -146,18 +143,16 @@ static StringRef getStorageType(StringRef type) {
 static std::string WritePCHRecord(StringRef type, StringRef name) {
   return "Record." +
          StringSwitch<std::string>(type)
-             .EndsWith("Decl *", "AddDeclRef(" + std::string(name) + ");\n")
+             .EndsWith("Decl *", "AddDeclRef(" + name.str() + ");\n")
              .Case("TypeSourceInfo *",
-                   "AddTypeSourceInfo(" + std::string(name) + ");\n")
-             .Case("Expr *", "AddStmt(" + std::string(name) + ");\n")
+                   "AddTypeSourceInfo(" + name.str() + ");\n")
+             .Case("Expr *", "AddStmt(" + name.str() + ");\n")
              .Case("IdentifierInfo *",
-                   "AddIdentifierRef(" + std::string(name) + ");\n")
-             .Case("StringRef", "AddString(" + std::string(name) + ");\n")
-             .Case("ParamIdx",
-                   "push_back(" + std::string(name) + ".serialize());\n")
-             .Case("OMPTraitInfo *",
-                   "writeOMPTraitInfo(" + std::string(name) + ");\n")
-             .Default("push_back(" + std::string(name) + ");\n");
+                   "AddIdentifierRef(" + name.str() + ");\n")
+             .Case("StringRef", "AddString(" + name.str() + ");\n")
+             .Case("ParamIdx", "push_back(" + name.str() + ".serialize());\n")
+             .Case("OMPTraitInfo *", "writeOMPTraitInfo(" + name.str() + ");\n")
+             .Default("push_back(" + name.str() + ");\n");
 }
 
 // Normalize attribute name by removing leading and trailing
@@ -198,7 +193,7 @@ static ParsedAttrMap getParsedAttrList(const RecordKeeper &Records,
       std::string AN;
       if (Attr->isSubClassOf("TargetSpecificAttr") &&
           !Attr->isValueUnset("ParseKind")) {
-        AN = std::string(Attr->getValueAsString("ParseKind"));
+        AN = Attr->getValueAsString("ParseKind").str();
 
         // If this attribute has already been handled, it does not need to be
         // handled again.
@@ -226,7 +221,7 @@ namespace {
 
   public:
     Argument(StringRef Arg, StringRef Attr)
-        : lowerName(std::string(Arg)), upperName(lowerName), attrName(Attr),
+        : lowerName(Arg.str()), upperName(lowerName), attrName(Attr),
           isOpt(false), Fake(false) {
       if (!lowerName.empty()) {
         lowerName[0] = std::tolower(lowerName[0]);
@@ -332,8 +327,7 @@ namespace {
 
     void writePCHWrite(raw_ostream &OS) const override {
       OS << "    "
-         << WritePCHRecord(type,
-                           "SA->get" + std::string(getUpperName()) + "()");
+         << WritePCHRecord(type, "SA->get" + getUpperName().str() + "()");
     }
 
     std::string getIsOmitted() const override {
@@ -699,12 +693,12 @@ namespace {
     VariadicArgument(const Record &Arg, StringRef Attr, std::string T)
         : Argument(Arg, Attr), Type(std::move(T)),
           ArgName(getLowerName().str() + "_"), ArgSizeName(ArgName + "Size"),
-          RangeName(std::string(getLowerName())) {}
+          RangeName(getLowerName().str()) {}
 
     VariadicArgument(StringRef Arg, StringRef Attr, std::string T)
         : Argument(Arg, Attr), Type(std::move(T)),
           ArgName(getLowerName().str() + "_"), ArgSizeName(ArgName + "Size"),
-          RangeName(std::string(getLowerName())) {}
+          RangeName(getLowerName().str()) {}
 
     const std::string &getType() const { return Type; }
     const std::string &getArgName() const { return ArgName; }
@@ -793,8 +787,8 @@ namespace {
       // If we can't store the values in the current type (if it's something
       // like StringRef), store them in a different type and convert the
       // container afterwards.
-      std::string StorageType = std::string(getStorageType(getType()));
-      std::string StorageName = std::string(getLowerName());
+      std::string StorageType = getStorageType(getType()).str();
+      std::string StorageName = getLowerName().str();
       if (StorageType != getType()) {
         StorageName += "Storage";
         OS << "    SmallVector<" << StorageType << ", 4> "
@@ -1082,8 +1076,7 @@ namespace {
 
   public:
     VariadicEnumArgument(const Record &Arg, StringRef Attr)
-        : VariadicArgument(Arg, Attr,
-                           std::string(Arg.getValueAsString("Type"))),
+        : VariadicArgument(Arg, Attr, Arg.getValueAsString("Type").str()),
           values(Arg.getValueAsListOfStrings("Values")),
           enums(Arg.getValueAsListOfStrings("Enums")),
           uniques(uniqueEnumsInOrder(enums)),
@@ -1438,7 +1431,7 @@ namespace {
     void writePCHWrite(raw_ostream &OS) const override {
       OS << "    "
          << WritePCHRecord(getType(),
-                           "SA->get" + std::string(getUpperName()) + "Loc()");
+                           "SA->get" + getUpperName().str() + "Loc()");
     }
   };
 
@@ -1554,7 +1547,7 @@ static void writeAvailabilityValue(raw_ostream &OS) {
      << "  OS << \"";
 }
 
-static void writeDeprecatedAttrValue(raw_ostream &OS, std::string &Variety) {
+static void writeDeprecatedAttrValue(raw_ostream &OS, StringRef Variety) {
   OS << "\\\"\" << getMessage() << \"\\\"\";\n";
   // Only GNU deprecated has an optional fixit argument at the second position.
   if (Variety == "GNU")
@@ -1577,9 +1570,12 @@ static void writeGetSpellingFunction(const Record &R, raw_ostream &OS) {
         "    llvm_unreachable(\"Unknown attribute spelling!\");\n"
         "    return \"(No spelling)\";\n";
 
-  for (unsigned I = 0; I < Spellings.size(); ++I)
-    OS << "  case " << I << ":\n"
-          "    return \"" << Spellings[I].name() << "\";\n";
+  for (const auto &[Idx, S] : enumerate(Spellings)) {
+    // clang-format off
+    OS << "  case " << Idx << ":\n"
+          "    return \"" << S.name() << "\";\n";
+    // clang-format on
+  }
   // End of the switch statement.
   OS << "  }\n";
   // End of the getSpelling function.
@@ -1607,14 +1603,14 @@ writePrettyPrintFunction(const Record &R,
      << "    llvm_unreachable(\"Unknown attribute spelling!\");\n"
      << "    break;\n";
 
-  for (unsigned I = 0; I < Spellings.size(); ++ I) {
+  for (const auto &[Idx, S] : enumerate(Spellings)) {
     SmallString<16> Prefix;
     SmallString<8> Suffix;
     // The actual spelling of the name and namespace (if applicable)
     // of an attribute without considering prefix and suffix.
     SmallString<64> Spelling;
-    std::string Name = Spellings[I].name();
-    std::string Variety = Spellings[I].variety();
+    StringRef Name = S.name();
+    StringRef Variety = S.variety();
 
     if (Variety == "GNU") {
       Prefix = "__attribute__((";
@@ -1622,7 +1618,7 @@ writePrettyPrintFunction(const Record &R,
     } else if (Variety == "CXX11" || Variety == "C23") {
       Prefix = "[[";
       Suffix = "]]";
-      std::string Namespace = Spellings[I].nameSpace();
+      StringRef Namespace = S.nameSpace();
       if (!Namespace.empty()) {
         Spelling += Namespace;
         Spelling += "::";
@@ -1639,7 +1635,7 @@ writePrettyPrintFunction(const Record &R,
     } else if (Variety == "Pragma") {
       Prefix = "#pragma ";
       Suffix = "\n";
-      std::string Namespace = Spellings[I].nameSpace();
+      StringRef Namespace = S.nameSpace();
       if (!Namespace.empty()) {
         Spelling += Namespace;
         Spelling += " ";
@@ -1653,7 +1649,7 @@ writePrettyPrintFunction(const Record &R,
 
     Spelling += Name;
 
-    OS << "  case " << I << " : {\n"
+    OS << "  case " << Idx << " : {\n"
        << "    OS << \"" << Prefix << Spelling << "\";\n";
 
     if (Variety == "Pragma") {
@@ -1724,24 +1720,17 @@ writePrettyPrintFunction(const Record &R,
 }
 
 /// Return the index of a spelling in a spelling list.
-static unsigned
-getSpellingListIndex(const std::vector<FlattenedSpelling> &SpellingList,
-                     const FlattenedSpelling &Spelling) {
+static unsigned getSpellingListIndex(ArrayRef<FlattenedSpelling> SpellingList,
+                                     const FlattenedSpelling &Spelling) {
   assert(!SpellingList.empty() && "Spelling list is empty!");
 
-  for (unsigned Index = 0; Index < SpellingList.size(); ++Index) {
-    const FlattenedSpelling &S = SpellingList[Index];
-    if (S.variety() != Spelling.variety())
-      continue;
-    if (S.nameSpace() != Spelling.nameSpace())
-      continue;
-    if (S.name() != Spelling.name())
-      continue;
-
-    return Index;
+  for (const auto &[Index, S] : enumerate(SpellingList)) {
+    if (S.variety() == Spelling.variety() &&
+        S.nameSpace() == Spelling.nameSpace() && S.name() == Spelling.name())
+      return Index;
   }
 
-  llvm_unreachable("Unknown spelling!");
+  PrintFatalError("Unknown spelling: " + Spelling.name());
 }
 
 static void writeAttrAccessorDefinition(const Record &R, raw_ostream &OS) {
@@ -1771,11 +1760,10 @@ static void writeAttrAccessorDefinition(const Record &R, raw_ostream &OS) {
 static bool
 SpellingNamesAreCommon(const std::vector<FlattenedSpelling>& Spellings) {
   assert(!Spellings.empty() && "An empty list of spellings was provided");
-  std::string FirstName =
-      std::string(NormalizeNameForSpellingComparison(Spellings.front().name()));
+  StringRef FirstName =
+      NormalizeNameForSpellingComparison(Spellings.front().name());
   for (const auto &Spelling : drop_begin(Spellings)) {
-    std::string Name =
-        std::string(NormalizeNameForSpellingComparison(Spelling.name()));
+    StringRef Name = NormalizeNameForSpellingComparison(Spelling.name());
     if (Name != FirstName)
       return false;
   }
@@ -1802,15 +1790,15 @@ CreateSemanticSpellings(const std::vector<FlattenedSpelling> &Spellings,
          "AttributeCommonInfo");
   for (auto I = Spellings.begin(), E = Spellings.end(); I != E; ++I, ++Idx) {
     const FlattenedSpelling &S = *I;
-    const std::string &Variety = S.variety();
-    const std::string &Spelling = S.name();
-    const std::string &Namespace = S.nameSpace();
+    StringRef Variety = S.variety();
+    StringRef Spelling = S.name();
+    StringRef Namespace = S.nameSpace();
     std::string EnumName;
 
-    EnumName += (Variety + "_");
+    EnumName += Variety;
+    EnumName += "_";
     if (!Namespace.empty())
-      EnumName += (NormalizeNameForSpellingComparison(Namespace).str() +
-      "_");
+      EnumName += NormalizeNameForSpellingComparison(Namespace).str() + "_";
     EnumName += NormalizeNameForSpellingComparison(Spelling);
 
     // Even if the name is not unique, this spelling index corresponds to a
@@ -1837,7 +1825,7 @@ CreateSemanticSpellings(const std::vector<FlattenedSpelling> &Spellings,
   return Ret;
 }
 
-void WriteSemanticSpellingSwitch(const std::string &VarName,
+void WriteSemanticSpellingSwitch(StringRef VarName,
                                  const SemanticSpellingMap &Map,
                                  raw_ostream &OS) {
   OS << "  switch (" << VarName << ") {\n    default: "
@@ -1990,7 +1978,7 @@ struct AttributeSubjectMatchRule {
   }
 
   std::string getSpelling() const {
-    std::string Result = std::string(MetaSubject->getValueAsString("Name"));
+    std::string Result = MetaSubject->getValueAsString("Name").str();
     if (isSubRule()) {
       Result += '(';
       if (isNegatedSubRule())
@@ -2378,30 +2366,22 @@ void PragmaClangAttributeSupport::generateParsingHelpers(raw_ostream &OS) {
 }
 
 template <typename Fn> static void forEachSpelling(const Record &Attr, Fn &&F) {
-  std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(Attr);
-  for (const FlattenedSpelling &S : Spellings) {
+  for (const FlattenedSpelling &S : GetFlattenedSpellings(Attr)) {
     F(S);
   }
 }
 
-std::map<std::string, std::vector<const Record *>> NameToAttrsMap;
+std::map<StringRef, std::vector<const Record *>> NameToAttrsMap;
 
 /// Build a map from the attribute name to the Attrs that use that name. If more
 /// than one Attr use a name, the arguments could be different so a more complex
 /// check is needed in the generated switch.
 void generateNameToAttrsMap(const RecordKeeper &Records) {
   for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
-    std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*A);
-    for (const auto &S : Spellings) {
-      auto It = NameToAttrsMap.find(S.name());
-      if (It != NameToAttrsMap.end()) {
-        if (none_of(It->second, [&](const Record *R) { return R == A; }))
-          It->second.emplace_back(A);
-      } else {
-        std::vector<const Record *> V;
-        V.emplace_back(A);
-        NameToAttrsMap.insert(std::make_pair(S.name(), V));
-      }
+    for (const FlattenedSpelling &S : GetFlattenedSpellings(*A)) {
+      auto [It, Inserted] = NameToAttrsMap.try_emplace(S.name());
+      if (Inserted || !is_contained(It->second, A))
+        It->second.emplace_back(A);
     }
   }
 }
@@ -2410,7 +2390,7 @@ void generateNameToAttrsMap(const RecordKeeper &Records) {
 /// attribute has the same name. Store the info in a map that can be processed
 /// after all attributes are seen.
 static void generateFlattenedSpellingInfo(const Record &Attr,
-                                          std::map<std::string, FSIVecTy> &Map,
+                                          std::map<StringRef, FSIVecTy> &Map,
                                           uint32_t ArgMask = 0) {
   std::string TargetTest;
   if (Attr.isSubClassOf("TargetSpecificAttr") &&
@@ -2421,24 +2401,17 @@ static void generateFlattenedSpellingInfo(const Record &Attr,
   }
 
   forEachSpelling(Attr, [&](const FlattenedSpelling &S) {
-    auto It = Map.find(S.name());
-    if (It != Map.end()) {
-      It->second.emplace_back(S.variety(), S.nameSpace(), TargetTest, ArgMask);
-    } else {
-      FSIVecTy V;
-      V.emplace_back(S.variety(), S.nameSpace(), TargetTest, ArgMask);
-      Map.insert(std::make_pair(S.name(), V));
-    }
+    Map[S.name()].emplace_back(S.variety(), S.nameSpace(), TargetTest, ArgMask);
   });
 }
 
-static bool nameAppliesToOneAttribute(std::string Name) {
+static bool nameAppliesToOneAttribute(StringRef Name) {
   auto It = NameToAttrsMap.find(Name);
   assert(It != NameToAttrsMap.end());
   return It->second.size() == 1;
 }
 
-static bool emitIfSimpleValue(std::string Name, uint32_t ArgMask,
+static bool emitIfSimpleValue(StringRef Name, uint32_t ArgMask,
                               raw_ostream &OS) {
   if (nameAppliesToOneAttribute(Name)) {
     OS << ".Case(\"" << Name << "\", ";
@@ -2463,15 +2436,13 @@ static void emitSingleCondition(const FlattenedSpellingInfo &FSI,
   OS << ")";
 }
 
-static void emitStringSwitchCases(std::map<std::string, FSIVecTy> &Map,
+static void emitStringSwitchCases(std::map<StringRef, FSIVecTy> &Map,
                                   raw_ostream &OS) {
-  for (const auto &P : Map) {
-    if (emitIfSimpleValue(P.first, P.second[0].ArgMask, OS))
+  for (const auto &[Name, Vec] : Map) {
+    if (emitIfSimpleValue(Name, Vec[0].ArgMask, OS))
       continue;
 
     // Not simple, build expressions for each case.
-    StringRef Name = P.first;
-    const FSIVecTy &Vec = P.second;
     OS << ".Case(\"" << Name << "\", ";
     for (unsigned I = 0, E = Vec.size(); I < E; ++I) {
       emitSingleCondition(Vec[I], OS);
@@ -2498,7 +2469,7 @@ static bool isTypeArgument(const Record *Arg) {
 static void emitClangAttrTypeArgList(const RecordKeeper &Records,
                                      raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_TYPE_ARG_LIST)\n";
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is a type.
     std::vector<const Record *> Args = Attr->getValueAsListOfDefs("Args");
@@ -2518,7 +2489,7 @@ static void emitClangAttrTypeArgList(const RecordKeeper &Records,
 static void emitClangAttrArgContextList(const RecordKeeper &Records,
                                         raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_ARG_CONTEXT_LIST)\n";
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   ParsedAttrMap Attrs = getParsedAttrList(Records);
   for (const auto &I : Attrs) {
     const Record &Attr = *I.second;
@@ -2576,7 +2547,7 @@ static bool isVariadicStringLiteralArgument(const Record *Arg) {
 static void emitClangAttrVariadicIdentifierArgList(const RecordKeeper &Records,
                                                    raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_VARIADIC_IDENTIFIER_ARG_LIST)\n";
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is a variadic identifier.
     std::vector<const Record *> Args = A->getValueAsListOfDefs("Args");
@@ -2609,7 +2580,7 @@ emitClangAttrUnevaluatedStringLiteralList(const RecordKeeper &Records,
     return Bits;
   };
 
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether there are any string arguments.
     uint32_t ArgMask = MakeMask(Attr->getValueAsListOfDefs("Args"));
@@ -2625,7 +2596,7 @@ emitClangAttrUnevaluatedStringLiteralList(const RecordKeeper &Records,
 static void emitClangAttrIdentifierArgList(const RecordKeeper &Records,
                                            raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_IDENTIFIER_ARG_LIST)\n";
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is an identifier.
     std::vector<const Record *> Args = Attr->getValueAsListOfDefs("Args");
@@ -2641,7 +2612,7 @@ static void emitClangAttrIdentifierArgList(const RecordKeeper &Records,
 static void emitClangAttrStrictIdentifierArgList(const RecordKeeper &Records,
                                                  raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_STRICT_IDENTIFIER_ARG_LIST)\n";
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto *Attr : Records.getAllDerivedDefinitions("Attr")) {
     if (!Attr->getValueAsBit("StrictEnumParameters"))
       continue;
@@ -2665,7 +2636,7 @@ static bool keywordThisIsaIdentifierInArgument(const Record *Arg) {
 static void emitClangAttrThisIsaIdentifierArgList(const RecordKeeper &Records,
                                                   raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_THIS_ISA_IDENTIFIER_ARG_LIST)\n";
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     // Determine whether the first argument is a variadic identifier.
     std::vector<const Record *> Args = A->getValueAsListOfDefs("Args");
@@ -2681,7 +2652,7 @@ static void emitClangAttrAcceptsExprPack(const RecordKeeper &Records,
                                          raw_ostream &OS) {
   OS << "#if defined(CLANG_ATTR_ACCEPTS_EXPR_PACK)\n";
   ParsedAttrMap Attrs = getParsedAttrList(Records);
-  std::map<std::string, FSIVecTy> FSIMap;
+  std::map<StringRef, FSIVecTy> FSIMap;
   for (const auto &I : Attrs) {
     const Record &Attr = *I.second;
 
@@ -2750,7 +2721,7 @@ static void emitAttributes(const RecordKeeper &Records, raw_ostream &OS,
     for (const auto &[R, _] : reverse(Supers)) {
       if (R->getName() != "TargetSpecificAttr" &&
           R->getName() != "DeclOrTypeAttr" && SuperName.empty())
-        SuperName = std::string(R->getName());
+        SuperName = R->getName().str();
       if (R->getName() == "InheritableAttr")
         Inheritable = true;
     }
@@ -3678,9 +3649,8 @@ static bool GenerateTargetSpecificAttrChecks(const Record *R,
 }
 
 static void GenerateHasAttrSpellingStringSwitch(
-    const std::vector<std::pair<const Record *, FlattenedSpelling>> &Attrs,
-    raw_ostream &OS, const std::string &Variety,
-    const std::string &Scope = "") {
+    ArrayRef<std::pair<const Record *, FlattenedSpelling>> Attrs,
+    raw_ostream &OS, StringRef Variety, StringRef Scope = "") {
   for (const auto &[Attr, Spelling] : Attrs) {
     // C++11-style attributes have specific version information associated with
     // them. If the attribute has no scope, the version information must not
@@ -3783,18 +3753,15 @@ void EmitClangAttrHasAttrImpl(const RecordKeeper &Records, raw_ostream &OS) {
 
   // Separate all of the attributes out into four group: generic, C++11, GNU,
   // and declspecs. Then generate a big switch statement for each of them.
-  std::vector<std::pair<const Record *, FlattenedSpelling>> Declspec, Microsoft,
-      GNU, Pragma, HLSLAnnotation;
-  std::map<std::string,
-           std::vector<std::pair<const Record *, FlattenedSpelling>>>
-      CXX, C23;
+  using PairTy = std::pair<const Record *, FlattenedSpelling>;
+  std::vector<PairTy> Declspec, Microsoft, GNU, Pragma, HLSLAnnotation;
+  std::map<StringRef, std::vector<PairTy>> CXX, C23;
 
   // Walk over the list of all attributes, and split them out based on the
   // spelling variety.
   for (auto *R : Records.getAllDerivedDefinitions("Attr")) {
-    std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(*R);
-    for (const auto &SI : Spellings) {
-      const std::string &Variety = SI.variety();
+    for (const FlattenedSpelling &SI : GetFlattenedSpellings(*R)) {
+      StringRef Variety = SI.variety();
       if (Variety == "GNU")
         GNU.emplace_back(R, SI);
       else if (Variety == "Declspec")
@@ -3829,22 +3796,16 @@ void EmitClangAttrHasAttrImpl(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "case AttributeCommonInfo::Syntax::AS_HLSLAnnotation:\n";
   OS << "  return llvm::StringSwitch<int>(Name)\n";
   GenerateHasAttrSpellingStringSwitch(HLSLAnnotation, OS, "HLSLAnnotation");
-  auto fn = [&OS](const char *Spelling,
-                  const std::map<
-                      std::string,
-                      std::vector<std::pair<const Record *, FlattenedSpelling>>>
-                      &List) {
+  auto fn = [&OS](StringRef Spelling,
+                  const std::map<StringRef, std::vector<PairTy>> &Map) {
     OS << "case AttributeCommonInfo::Syntax::AS_" << Spelling << ": {\n";
     // C++11-style attributes are further split out based on the Scope.
-    for (auto I = List.cbegin(), E = List.cend(); I != E; ++I) {
-      if (I != List.cbegin())
-        OS << " else ";
-      if (I->first.empty())
-        OS << "if (ScopeName == \"\") {\n";
-      else
-        OS << "if (ScopeName == \"" << I->first << "\") {\n";
+    ListSeparator LS(" else ");
+    for (const auto &[Scope, List] : Map) {
+      OS << LS;
+      OS << "if (ScopeName == \"" << Scope << "\") {\n";
       OS << "  return llvm::StringSwitch<int>(Name)\n";
-      GenerateHasAttrSpellingStringSwitch(I->second, OS, Spelling, I->first);
+      GenerateHasAttrSpellingStringSwitch(List, OS, Spelling, Scope);
       OS << "}";
     }
     OS << "\n} break;\n";
@@ -4086,9 +4047,9 @@ static void emitArgInfo(const Record &R, raw_ostream &OS) {
 }
 
 static std::string GetDiagnosticSpelling(const Record &R) {
-  std::string Ret = std::string(R.getValueAsString("DiagSpelling"));
+  StringRef Ret = R.getValueAsString("DiagSpelling");
   if (!Ret.empty())
-    return Ret;
+    return Ret.str();
 
   // If we couldn't find the DiagSpelling in this object, we can check to see
   // if the object is one that has a base, and if it is, loop up to the Base
@@ -4121,7 +4082,7 @@ static std::string CalculateDiagnostic(const Record &S) {
       SmallVector<StringRef, 2> Frags;
       SplitString(V, Frags, ",");
       for (auto Str : Frags) {
-        DiagList.push_back(std::string(Str.trim()));
+        DiagList.push_back(Str.trim().str());
       }
     }
   }
@@ -4152,7 +4113,7 @@ static std::string CalculateDiagnostic(const Record &S) {
 }
 
 static std::string GetSubjectWithSuffix(const Record *R) {
-  const std::string &B = std::string(R->getName());
+  const std::string B = R->getName().str();
   if (B == "DeclBase")
     return "Decl";
   return B + "Decl";
@@ -4424,7 +4385,7 @@ static void GenerateMutualExclusionsChecks(const Record &Attr,
     MergeStmtOS << "      auto Iter = llvm::find_if(C, [](const Attr *Check) "
                 << "{ return isa<";
     interleave(
-        StmtAttrs, [&](const std::string &Name) { MergeStmtOS << Name; },
+        StmtAttrs, [&](StringRef Name) { MergeStmtOS << Name; },
         [&] { MergeStmtOS << ", "; });
     MergeStmtOS << ">(Check); });\n";
     MergeStmtOS << "      if (Iter != C.end()) {\n";
@@ -4719,10 +4680,10 @@ void EmitClangAttrParsedAttrImpl(const RecordKeeper &Records, raw_ostream &OS) {
       OS << "static constexpr ParsedAttrInfo::Spelling " << I->first
          << "Spellings[] = {\n";
       for (const auto &S : Spellings) {
-        const std::string &RawSpelling = S.name();
+        StringRef RawSpelling = S.name();
         std::string Spelling;
         if (!S.nameSpace().empty())
-          Spelling += S.nameSpace() + "::";
+          Spelling += S.nameSpace().str() + "::";
         if (S.variety() == "GNU")
           Spelling += NormalizeGNUAttrSpelling(RawSpelling);
         else
@@ -4841,7 +4802,7 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records,
 
   std::vector<StringMatcher::StringPair> GNU, Declspec, Microsoft, CXX11,
       Keywords, Pragma, C23, HLSLAnnotation;
-  std::set<std::string> Seen;
+  std::set<StringRef> Seen;
   for (const auto *A : Records.getAllDerivedDefinitions("Attr")) {
     const Record &Attr = *A;
 
@@ -4861,38 +4822,41 @@ void EmitClangAttrParsedAttrKinds(const RecordKeeper &Records,
       std::string AttrName;
       if (Attr.isSubClassOf("TargetSpecificAttr") &&
           !Attr.isValueUnset("ParseKind")) {
-        AttrName = std::string(Attr.getValueAsString("ParseKind"));
-        if (!Seen.insert(AttrName).second)
+        StringRef ParseKind = Attr.getValueAsString("ParseKind");
+        if (!Seen.insert(ParseKind).second)
           continue;
-      } else
-        AttrName = NormalizeAttrName(StringRef(Attr.getName())).str();
+        AttrName = ParseKind.str();
+      } else {
+        AttrName = NormalizeAttrName(Attr.getName()).str();
+      }
 
       std::vector<FlattenedSpelling> Spellings = GetFlattenedSpellings(Attr);
       for (const auto &S : Spellings) {
-        const std::string &RawSpelling = S.name();
+        StringRef RawSpelling = S.name();
         std::vector<StringMatcher::StringPair> *Matches = nullptr;
         std::string Spelling;
-        const std::string &Variety = S.variety();
+        StringRef Variety = S.variety();
         if (Variety == "CXX11") {
           Matches = &CXX11;
           if (!S.nameSpace().empty())
-            Spelling += S.nameSpace() + "::";
+            Spelling += S.nameSpace().str() + "::";
         } else if (Variety == "C23") {
           Matches = &C23;
           if (!S.nameSpace().empty())
-            Spelling += S.nameSpace() + "::";
-        } else if (Variety == "GNU")
+            Spelling += S.nameSpace().str() + "::";
+        } else if (Variety == "GNU") {
           Matches = &GNU;
-        else if (Variety == "Declspec")
+        } else if (Variety == "Declspec") {
           Matches = &Declspec;
-        else if (Variety == "Microsoft")
+        } else if (Variety == "Microsoft") {
           Matches = &Microsoft;
-        else if (Variety == "Keyword")
+        } else if (Variety == "Keyword") {
           Matches = &Keywords;
-        else if (Variety == "Pragma")
+        } else if (Variety == "Pragma") {
           Matches = &Pragma;
-        else if (Variety == "HLSLAnnotation")
+        } else if (Variety == "HLSLAnnotation") {
           Matches = &HLSLAnnotation;
+        }
 
         assert(Matches && "Unsupported spelling variety found");
 
@@ -5073,14 +5037,16 @@ class SpellingList {
             .Case("Pragma", SpellingKind::Pragma)
             .Case("HLSLAnnotation", SpellingKind::HLSLAnnotation);
     std::string Name;
-    if (!Spelling.nameSpace().empty()) {
+    StringRef NameSpace = Spelling.nameSpace();
+    if (!NameSpace.empty()) {
+      Name = NameSpace;
       switch (Kind) {
       case SpellingKind::CXX11:
       case SpellingKind::C23:
-        Name = Spelling.nameSpace() + "::";
+        Name += "::";
         break;
       case SpellingKind::Pragma:
-        Name = Spelling.nameSpace() + " ";
+        Name = " ";
         break;
       default:
         PrintFatalError(Attr.getLoc(), "Unexpected namespace in spelling");
@@ -5134,7 +5100,7 @@ GetAttributeHeadingAndSpellings(const Record &Documentation,
                     "documented");
 
   // Determine the heading to be used for this attribute.
-  std::string Heading = std::string(Documentation.getValueAsString("Heading"));
+  std::string Heading = Documentation.getValueAsString("Heading").str();
   if (Heading.empty()) {
     // If there's only one spelling, we can simply use that.
     if (Spellings.size() == 1)
@@ -5144,7 +5110,7 @@ GetAttributeHeadingAndSpellings(const Record &Documentation,
       for (auto I = Spellings.begin(), E = Spellings.end();
            I != E; ++I) {
         std::string Spelling =
-            std::string(NormalizeNameForSpellingComparison(I->name()));
+            NormalizeNameForSpellingComparison(I->name()).str();
         Uniques.insert(Spelling);
       }
       // If the semantic map has only one spelling, that is sufficient for our
diff --git a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
index 1a2503dcf660cfb..45a97425ef920aa 100644
--- a/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
+++ b/clang/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
@@ -63,7 +63,7 @@ void clang::EmitClangCommentCommandInfo(const RecordKeeper &Records,
   std::vector<StringMatcher::StringPair> Matches;
   for (size_t i = 0, e = Tags.size(); i != e; ++i) {
     const Record &Tag = *Tags[i];
-    std::string Name = std::string(Tag.getValueAsString("Name"));
+    std::string Name = Tag.getValueAsString("Name").str();
     std::string Return;
     raw_string_ostream(Return) << "return &Commands[" << i << "];";
     Matches.emplace_back(std::move(Name), std::move(Return));
diff --git a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
index bd75b3f6b652a16..2d615760814e01b 100644
--- a/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
+++ b/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
@@ -51,7 +51,7 @@ void clang::EmitClangCommentHTMLNamedCharacterReferences(
   std::vector<StringMatcher::StringPair> NameToUTF8;
   SmallString<32> CLiteral;
   for (const Record *Tag : Records.getAllDerivedDefinitions("NCR")) {
-    std::string Spelling = std::string(Tag->getValueAsString("Spelling"));
+    std::string Spelling = Tag->getValueAsString("Spelling").str();
     uint64_t CodePoint = Tag->getValueAsInt("CodePoint");
     CLiteral.clear();
     CLiteral.append("return ");
diff --git a/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp b/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp
index a457315bc62c5c7..7d65cfe0d3f5292 100644
--- a/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp
+++ b/clang/utils/TableGen/ClangCommentHTMLTagsEmitter.cpp
@@ -24,7 +24,7 @@ void clang::EmitClangCommentHTMLTags(const RecordKeeper &Records,
   ArrayRef<const Record *> Tags = Records.getAllDerivedDefinitions("Tag");
   std::vector<StringMatcher::StringPair> Matches;
   for (const Record *Tag : Tags) {
-    Matches.emplace_back(std::string(Tag->getValueAsString("Spelling")),
+    Matches.emplace_back(Tag->getValueAsString("Spelling").str(),
                          "return true;");
   }
 
@@ -42,7 +42,7 @@ void clang::EmitClangCommentHTMLTagsProperties(const RecordKeeper &Records,
   std::vector<StringMatcher::StringPair> MatchesEndTagOptional;
   std::vector<StringMatcher::StringPair> MatchesEndTagForbidden;
   for (const Record *Tag : Tags) {
-    std::string Spelling = std::string(Tag->getValueAsString("Spelling"));
+    std::string Spelling = Tag->getValueAsString("Spelling").str();
     StringMatcher::StringPair Match(Spelling, "return true;");
     if (Tag->getValueAsBit("EndTagOptional"))
       MatchesEndTagOptional.push_back(Match);
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 35c8fcf69910b6a..c6d82646b40de24 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2588,11 +2588,66 @@ void NeonEmitter::runVectorTypes(raw_ostream &OS) {
   OS << "typedef __fp16 float16_t;\n";
 
   OS << "#if defined(__aarch64__) || defined(__arm64ec__)\n";
+  OS << "typedef __MFloat8_t __mfp8;\n";
   OS << "typedef __MFloat8x8_t mfloat8x8_t;\n";
   OS << "typedef __MFloat8x16_t mfloat8x16_t;\n";
   OS << "typedef double float64_t;\n";
   OS << "#endif\n\n";
 
+  OS << R"(
+typedef uint64_t fpm_t;
+
+enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 };
+
+enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE };
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_fpm_init(void) {
+  return 0;
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
+  return (__fpm & ~7ull) | (fpm_t)__format;
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
+  return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) {
+  return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
+  return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) {
+  return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) {
+  return (__fpm & ~0x7f0000ull) | (__scale << 16u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) {
+  return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u);
+}
+
+static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__))
+__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) {
+  return (uint32_t)__fpm | (__scale << 32u);
+}
+
+)";
+
   emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQd", OS);
 
   emitNeonTypeDefs("bQb", OS);
diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html
index 82ba9b370ba5953..186f7cc0ace5465 100755
--- a/clang/www/cxx_dr_status.html
+++ b/clang/www/cxx_dr_status.html
@@ -1721,7 +1721,7 @@ <h2 id="cxxdr">C++ defect report implementation status</h2>
     <td><a href="https://cplusplus.github.io/CWG/issues/279.html">279</a></td>
     <td>CD6</td>
     <td>Correspondence of "names for linkage purposes"</td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="none" align="center">No</td>
   </tr>
   <tr id="280">
     <td><a href="https://cplusplus.github.io/CWG/issues/280.html">280</a></td>
@@ -2075,7 +2075,7 @@ <h2 id="cxxdr">C++ defect report implementation status</h2>
     <td><a href="https://cplusplus.github.io/CWG/issues/338.html">338</a></td>
     <td>CD6</td>
     <td>Enumerator name with linkage used as class name in other translation unit</td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="partial" align="center">Duplicate of <a href="#1884">1884</a></td>
   </tr>
   <tr id="339">
     <td><a href="https://cplusplus.github.io/CWG/issues/339.html">339</a></td>
@@ -11131,7 +11131,7 @@ <h2 id="cxxdr">C++ defect report implementation status</h2>
     <td><a href="https://cplusplus.github.io/CWG/issues/1884.html">1884</a></td>
     <td>CD6</td>
     <td>Unclear requirements for same-named external-linkage entities</td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="partial" align="center">Partial</td>
   </tr>
   <tr id="1885">
     <td><a href="https://cplusplus.github.io/CWG/issues/1885.html">1885</a></td>
@@ -11219,7 +11219,7 @@ <h2 id="cxxdr">C++ defect report implementation status</h2>
     <td><a href="https://cplusplus.github.io/CWG/issues/1898.html">1898</a></td>
     <td>CD6</td>
     <td>Use of &#8220;equivalent&#8221; in overload resolution</td>
-    <td class="unknown" align="center">Unknown</td>
+    <td class="full" align="center">Clang 2.7</td>
   </tr>
   <tr id="1899">
     <td><a href="https://cplusplus.github.io/CWG/issues/1899.html">1899</a></td>
diff --git a/clang/www/index.html b/clang/www/index.html
index 95bbfa86172ba40..465a6a6d29dc765 100755
--- a/clang/www/index.html
+++ b/clang/www/index.html
@@ -16,8 +16,8 @@ <h1>Clang: a C language family frontend for LLVM</h1>
   <!--*********************************************************************-->
 
   <p>The Clang project provides a language front-end and tooling infrastructure
-  for languages in the C language family (C, C++, Objective C/C++, OpenCL,
-  CUDA, and RenderScript) for the <a href="https://www.llvm.org/">LLVM</a>
+  for languages in the C language family (C, C++, Objective C/C++, OpenCL, and
+  CUDA) for the <a href="https://www.llvm.org/">LLVM</a>
   project. Both a GCC-compatible compiler driver (<tt>clang</tt>) and an
   MSVC-compatible compiler driver (<tt>clang-cl.exe</tt>) are provided. You
   can <a href="get_started.html">get and build</a> the source today.</p>
diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index b2f33d1a961c747..e3d81d241b10542 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -773,7 +773,6 @@ function(configure_compiler_rt_lit_site_cfg input output)
 
   string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_TEST_COMPILER ${COMPILER_RT_TEST_COMPILER})
   string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_OUTPUT_DIR ${COMPILER_RT_OUTPUT_DIR})
-  string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR ${COMPILER_RT_EXEC_OUTPUT_DIR})
   string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR ${output_dir})
 
   configure_lit_site_cfg(${input} ${output})
diff --git a/compiler-rt/include/fuzzer/FuzzedDataProvider.h b/compiler-rt/include/fuzzer/FuzzedDataProvider.h
index 5903ed837917ca9..e57b95b6304a9a0 100644
--- a/compiler-rt/include/fuzzer/FuzzedDataProvider.h
+++ b/compiler-rt/include/fuzzer/FuzzedDataProvider.h
@@ -18,6 +18,7 @@
 #include <climits>
 #include <cstddef>
 #include <cstdint>
+#include <cstdlib>
 #include <cstring>
 #include <initializer_list>
 #include <limits>
diff --git a/compiler-rt/lib/asan/scripts/asan_symbolize.py b/compiler-rt/lib/asan/scripts/asan_symbolize.py
index b08769614aeb18f..058a1614b55e6a7 100755
--- a/compiler-rt/lib/asan/scripts/asan_symbolize.py
+++ b/compiler-rt/lib/asan/scripts/asan_symbolize.py
@@ -316,7 +316,7 @@ def symbolize(self, addr, binary, offset):
         #   * For C functions atos omits parentheses and argument types.
         #   * For C++ functions the function name (i.e., `foo` above) may contain
         #     templates which may contain parentheses.
-        match = re.match("^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
+        match = re.match(r"^(.*) \(in (.*)\) \((.*:\d*)\)$", atos_line)
         logging.debug("atos_line: %s", atos_line)
         if match:
             function_name = match.group(1)
@@ -541,7 +541,7 @@ def process_line_posix(self, line):
         # names in the regex because it could be an
         # Objective-C or C++ demangled name.
         stack_trace_line_format = (
-            "^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
+            r"^( *#([0-9]+) *)(0x[0-9a-f]+) *(?:in *.+)? *\((.*)\+(0x[0-9a-f]+)\)"
         )
         match = re.match(stack_trace_line_format, line)
         if not match:
diff --git a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
index 902fa8f79ab8164..e454524c9cb6a23 100644
--- a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
+++ b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc
@@ -53,9 +53,9 @@ enum CPUFeatures {
   FEAT_EBF16,
   FEAT_RPRES,
   FEAT_SVE,
-  FEAT_SVE_BF16,
-  FEAT_SVE_EBF16,
-  FEAT_SVE_I8MM,
+  RESERVED_FEAT_SVE_BF16,  // previously used and now ABI legacy
+  RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy
+  RESERVED_FEAT_SVE_I8MM,  // previously used and now ABI legacy
   FEAT_SVE_F32MM,
   FEAT_SVE_F64MM,
   FEAT_SVE2,
diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
index 0c76a4fe9b9f2f2..4e25feb2e90c635 100644
--- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
+++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -65,14 +65,10 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_I8MM);
   if (hwcap2 & HWCAP2_EBF16)
     setCPUFeature(FEAT_EBF16);
-  if (hwcap2 & HWCAP2_SVE_EBF16)
-    setCPUFeature(FEAT_SVE_EBF16);
   if (hwcap2 & HWCAP2_DGH)
     setCPUFeature(FEAT_DGH);
   if (hwcap2 & HWCAP2_FRINT)
     setCPUFeature(FEAT_FRINTTS);
-  if (hwcap2 & HWCAP2_SVEI8MM)
-    setCPUFeature(FEAT_SVE_I8MM);
   if (hwcap2 & HWCAP2_SVEF32MM)
     setCPUFeature(FEAT_SVE_F32MM);
   if (hwcap2 & HWCAP2_SVEF64MM)
@@ -119,8 +115,6 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
     setCPUFeature(FEAT_RCPC3);
   if (hwcap2 & HWCAP2_BF16)
     setCPUFeature(FEAT_BF16);
-  if (hwcap2 & HWCAP2_SVEBF16)
-    setCPUFeature(FEAT_SVE_BF16);
   if (hwcap & HWCAP_SVE)
     setCPUFeature(FEAT_SVE);
   if (hwcap2 & HWCAP2_SVE2)
diff --git a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h
index e8011014c2331d7..d92b51052194275 100644
--- a/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h
+++ b/compiler-rt/lib/hwasan/hwasan_platform_interceptors.h
@@ -200,9 +200,6 @@
 #undef SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID
 #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID 0
 
-#undef SANITIZER_INTERCEPT_TIMER_CREATE
-#define SANITIZER_INTERCEPT_TIMER_CREATE 0
-
 #undef SANITIZER_INTERCEPT_GETITIMER
 #define SANITIZER_INTERCEPT_GETITIMER 0
 
diff --git a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
index d1b2857ccd8156f..efca6b82809b970 100755
--- a/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
+++ b/compiler-rt/lib/hwasan/scripts/hwasan_symbolize
@@ -316,7 +316,7 @@ class Symbolizer:
       self.__last_access_tag = int(match.group(2), 16)
 
   def process_tag_dump_line(self, line, ignore_tags=False):
-    m = re.match(r'.*?(0x[0-9a-f]+):' + '([ ]*[\[ ][0-9a-f][0-9a-f]\]?)' * 16, line)
+    m = re.match(r'.*?(0x[0-9a-f]+):' + r'([ ]*[\[ ][0-9a-f][0-9a-f]\]?)' * 16, line)
     if m is None:
       return False
     addr = m.group(1)
diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp
index ad265acf4c1e39a..41b99fabe84f478 100644
--- a/compiler-rt/lib/msan/tests/msan_test.cpp
+++ b/compiler-rt/lib/msan/tests/msan_test.cpp
@@ -4881,27 +4881,4 @@ TEST(MemorySanitizer, throw_catch) {
     // pass
   }
 }
-
-#if defined(__linux__)
-TEST(MemorySanitizer, timer_create) {
-  timer_t timer;
-  EXPECT_POISONED(timer);
-  int res = timer_create(CLOCK_REALTIME, nullptr, &timer);
-  ASSERT_EQ(0, res);
-  EXPECT_NOT_POISONED(timer);
-
-  // Make sure the timer is usable.
-  struct itimerspec cur_value {};
-  cur_value.it_value.tv_sec = 1;
-  EXPECT_EQ(0, timer_settime(timer, 0, &cur_value, nullptr));
-
-  timer_t timer2;
-  EXPECT_POISONED(timer2);
-  // Use an invalid clock_id to make timer_create fail.
-  res = timer_create(INT_MAX, nullptr, &timer2);
-  ASSERT_EQ(-1, res);
-  EXPECT_POISONED(timer2);
-  timer_delete(timer);
-}
-#endif
 } // namespace
diff --git a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp
index 9e455f0326a549e..ed9ee4ded7b0598 100644
--- a/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp
+++ b/compiler-rt/lib/rtsan/tests/rtsan_test_functional.cpp
@@ -204,11 +204,11 @@ TEST(TestRtsan, ThrowingAnExceptionDiesWhenRealtime) {
 
 TEST(TestRtsan, DoesNotDieIfTurnedOff) {
   std::mutex mutex;
-  auto RealtimeUnsafeFunc = [&]() {
+  auto RealtimeBlockingFunc = [&]() {
     __rtsan_disable();
     mutex.lock();
     mutex.unlock();
     __rtsan_enable();
   };
-  RealtimeInvoke(RealtimeUnsafeFunc);
+  RealtimeInvoke(RealtimeBlockingFunc);
 }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 211f9f70d7e4c6c..b8627f8557afe29 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -2289,24 +2289,6 @@ INTERCEPTOR(int, pthread_getcpuclockid, uptr thread,
 #define INIT_CLOCK_GETCPUCLOCKID
 #endif
 
-#if SANITIZER_INTERCEPT_TIMER_CREATE
-INTERCEPTOR(int, timer_create, __sanitizer_clockid_t clockid, void *sevp,
-            __sanitizer_timer_t *timer) {
-  void *ctx;
-  COMMON_INTERCEPTOR_ENTER(ctx, timer_create, clockid, sevp, timer);
-  int res = REAL(timer_create)(clockid, sevp, timer);
-  if (!res && timer) {
-    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, timer, sizeof *timer);
-  }
-  return res;
-}
-
-#  define INIT_TIMER_CREATE \
-    COMMON_INTERCEPT_FUNCTION_GLIBC_VER_MIN(timer_create, "GLIBC_2.3.3");
-#else
-#  define INIT_TIMER_CREATE
-#endif
-
 #if SANITIZER_INTERCEPT_GETITIMER
 INTERCEPTOR(int, getitimer, int which, void *curr_value) {
   void *ctx;
@@ -10284,7 +10266,6 @@ static void InitializeCommonInterceptors() {
   INIT_SETPWENT;
   INIT_CLOCK_GETTIME;
   INIT_CLOCK_GETCPUCLOCKID;
-  INIT_TIMER_CREATE;
   INIT_GETITIMER;
   INIT_TIME;
   INIT_GLOB;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 36fafdc642642bf..6959a6d52d604e0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -237,9 +237,6 @@
   (SI_FREEBSD || SI_NETBSD || SI_LINUX || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID \
   (SI_LINUX || SI_FREEBSD || SI_NETBSD)
-// TODO: This should be SI_POSIX, adding Linux first until I have time
-// to verify all timer_t typedefs on other platforms.
-#define SANITIZER_INTERCEPT_TIMER_CREATE SI_LINUX
 #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
 #define SANITIZER_INTERCEPT_TIME SI_POSIX
 #define SANITIZER_INTERCEPT_GLOB (SI_GLIBC || SI_SOLARIS)
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
index b4ccf7b3d7bef48..e8c81aa8e281637 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -1517,10 +1517,6 @@ extern const int si_SEGV_ACCERR;
 
 #define SIGACTION_SYMNAME sigaction
 
-#  if SANITIZER_LINUX
-typedef void *__sanitizer_timer_t;
-#  endif
-
 #endif  // SANITIZER_LINUX || SANITIZER_APPLE
 
 #endif
diff --git a/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c b/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c
index 8fa07861371d56d..e02ab5b28ce046c 100644
--- a/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c
+++ b/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c
@@ -23,6 +23,7 @@ void *BoringThread(void *arg) {
 void *UAFThread(void *arg) {
   char * volatile x = (char*)malloc(10);
   fprintf(stderr, "ZZZ %p\n", x);
+  fflush(stderr);
   free(x);
   x[5] = 42;
   // CHECK: ERROR: HWAddressSanitizer: tag-mismatch on address
diff --git a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c
index 78bef538af11610..da1cb6869692066 100644
--- a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c
+++ b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c
@@ -21,6 +21,7 @@ int main() {
   memcpy(Q, P, 32);
 #endif
   write(STDOUT_FILENO, "recovered\n", 10);
+  fflush(stdout);
   // WRITE: ERROR: HWAddressSanitizer: tag-mismatch on address
   // WRITE: WRITE of size 32 at {{.*}} tags: [[PTR_TAG:..]]/[[MEM_TAG:..]] (ptr/mem)
   // WRITE: Invalid access starting at offset 16
diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free.c b/compiler-rt/test/hwasan/TestCases/use-after-free.c
index 070622f560a2255..fe4f8b32ea10060 100644
--- a/compiler-rt/test/hwasan/TestCases/use-after-free.c
+++ b/compiler-rt/test/hwasan/TestCases/use-after-free.c
@@ -15,6 +15,7 @@ int main() {
   free(x);
   __hwasan_disable_allocator_tagging();
   fprintf(stderr, ISREAD ? "Going to do a READ\n" : "Going to do a WRITE\n");
+  fflush(stderr);
   // CHECK: Going to do a [[TYPE:[A-Z]*]]
   int r = 0;
   if (ISREAD) r = x[5]; else x[5] = 42;  // should be on the same line.
@@ -31,11 +32,11 @@ int main() {
   //
   // CHECK: freed by thread {{.*}} here:
   // CHECK: #0 {{.*}} in {{.*}}free{{.*}} {{.*}}hwasan_allocation_functions.cpp
-  // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-19]]
+  // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-20]]
 
   // CHECK: previously allocated by thread {{.*}} here:
   // CHECK: #0 {{.*}} in {{.*}}malloc{{.*}} {{.*}}hwasan_allocation_functions.cpp
-  // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-24]]
+  // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-25]]
   // CHECK: Memory tags around the buggy address (one tag corresponds to 16 bytes):
   // CHECK: =>{{.*}}[[MEM_TAG]]
   // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main
diff --git a/compiler-rt/test/hwasan/lit.cfg.py b/compiler-rt/test/hwasan/lit.cfg.py
index bbf23e683240ac4..594f3294a84ac17 100644
--- a/compiler-rt/test/hwasan/lit.cfg.py
+++ b/compiler-rt/test/hwasan/lit.cfg.py
@@ -2,9 +2,6 @@
 
 import os
 
-from lit.llvm import llvm_config
-from lit.llvm.subst import ToolSubst, FindTool
-
 # Setup config name.
 config.name = "HWAddressSanitizer" + getattr(config, "name_suffix", "default")
 
@@ -77,12 +74,6 @@ def build_invocation(compile_flags):
     ("%env_hwasan_opts=", "env HWASAN_OPTIONS=" + default_hwasan_opts_str)
 )
 
-# Ensure that we can use hwasan_symbolize from the expected location
-llvm_config.add_tool_substitutions(
-    [ToolSubst("hwasan_symbolize", unresolved="fatal")],
-    search_dirs=[config.compiler_rt_bindir],
-)
-
 # Default test suffixes.
 config.suffixes = [".c", ".cpp"]
 
diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
index 050792b6b262175..66935c358afeddb 100644
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -28,7 +28,6 @@ set_default("python_executable", "@Python3_EXECUTABLE@")
 set_default("compiler_rt_debug", @COMPILER_RT_DEBUG_PYBOOL@)
 set_default("compiler_rt_intercept_libdispatch", @COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL@)
 set_default("compiler_rt_output_dir", "@COMPILER_RT_RESOLVED_OUTPUT_DIR@")
-set_default("compiler_rt_bindir", "@COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR@")
 set_default("compiler_rt_libdir", "@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@")
 set_default("emulator", "@COMPILER_RT_EMULATOR@")
 set_default("asan_shadow_scale", "@COMPILER_RT_ASAN_SHADOW_SCALE@")
diff --git a/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
index 92f3c29e970d42c..0dd721571de9b8a 100644
--- a/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
+++ b/compiler-rt/test/xray/TestCases/Posix/clang-xray-shared.cpp
@@ -1,6 +1,6 @@
 // Test that the DSO-local runtime library has been linked if -fxray-shared is passed.
 //
-// RUN: %clangxx -fxray-instrument -fxray-shared %s -shared -o %t.so
+// RUN: %clangxx -fxray-instrument -fxray-shared -fPIC %s -shared -o %t.so
 // RUN: llvm-nm %t.so | FileCheck %s --check-prefix ENABLED
 
 // RUN: %clangxx -fxray-instrument %s -shared -o %t.so
diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp
index 06ca12a492d29b0..62f8d39a8abaa5f 100644
--- a/flang/examples/FeatureList/FeatureList.cpp
+++ b/flang/examples/FeatureList/FeatureList.cpp
@@ -473,8 +473,8 @@ struct NodeVisitor {
   READ_FEATURE(OmpDependClause::InOut)
   READ_FEATURE(OmpDependClause::Sink)
   READ_FEATURE(OmpDependClause::Source)
-  READ_FEATURE(OmpDependenceType)
-  READ_FEATURE(OmpDependenceType::Type)
+  READ_FEATURE(OmpTaskDependenceType)
+  READ_FEATURE(OmpTaskDependenceType::Type)
   READ_FEATURE(OmpDependSinkVec)
   READ_FEATURE(OmpDependSinkVecLength)
   READ_FEATURE(OmpEndAllocators)
@@ -483,6 +483,8 @@ struct NodeVisitor {
   READ_FEATURE(OmpEndCriticalDirective)
   READ_FEATURE(OmpEndLoopDirective)
   READ_FEATURE(OmpEndSectionsDirective)
+  READ_FEATURE(OmpGrainsizeClause)
+  READ_FEATURE(OmpGrainsizeClause::Prescriptiveness)
   READ_FEATURE(OmpIfClause)
   READ_FEATURE(OmpIfClause::DirectiveNameModifier)
   READ_FEATURE(OmpLinearClause)
@@ -494,6 +496,8 @@ struct NodeVisitor {
   READ_FEATURE(OmpMapClause)
   READ_FEATURE(OmpMapClause::TypeModifier)
   READ_FEATURE(OmpMapClause::Type)
+  READ_FEATURE(OmpNumTasksClause)
+  READ_FEATURE(OmpNumTasksClause::Prescriptiveness)
   READ_FEATURE(OmpObject)
   READ_FEATURE(OmpObjectList)
   READ_FEATURE(OmpOrderClause)
diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
index 5d3c5cd72eef04d..d28ed0534d60026 100644
--- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
+++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
@@ -222,9 +222,9 @@ void OpenMPCounterVisitor::Post(const OmpLinearModifier::Type &c) {
   clauseDetails +=
       "modifier=" + std::string{OmpLinearModifier::EnumToString(c)} + ";";
 }
-void OpenMPCounterVisitor::Post(const OmpDependenceType::Type &c) {
+void OpenMPCounterVisitor::Post(const OmpTaskDependenceType::Type &c) {
   clauseDetails +=
-      "type=" + std::string{OmpDependenceType::EnumToString(c)} + ";";
+      "type=" + std::string{OmpTaskDependenceType::EnumToString(c)} + ";";
 }
 void OpenMPCounterVisitor::Post(const OmpMapClause::Type &c) {
   clauseDetails += "type=" + std::string{OmpMapClause::EnumToString(c)} + ";";
diff --git a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h
index 380534ebbfd70ac..68c52db46e2f008 100644
--- a/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h
+++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h
@@ -73,7 +73,7 @@ struct OpenMPCounterVisitor {
   void Post(const OmpDeviceTypeClause::Type &c);
   void Post(const OmpScheduleModifierType::ModType &c);
   void Post(const OmpLinearModifier::Type &c);
-  void Post(const OmpDependenceType::Type &c);
+  void Post(const OmpTaskDependenceType::Type &c);
   void Post(const OmpMapClause::Type &c);
   void Post(const OmpScheduleClause::ScheduleType &c);
   void Post(const OmpIfClause::DirectiveNameModifier &c);
diff --git a/flang/include/flang/Frontend/TargetOptions.h b/flang/include/flang/Frontend/TargetOptions.h
index 332adcbe6b6ac35..01c878067b921dc 100644
--- a/flang/include/flang/Frontend/TargetOptions.h
+++ b/flang/include/flang/Frontend/TargetOptions.h
@@ -44,6 +44,9 @@ class TargetOptions {
 
   /// The integer KINDs disabled for this target
   std::vector<int> disabledIntegerKinds;
+
+  /// Extended Altivec ABI on AIX
+  bool EnableAIXExtendedAltivecABI;
 };
 
 } // end namespace Fortran::frontend
diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def
index 0b22e54b648e94a..231de533fbd30ac 100644
--- a/flang/include/flang/Lower/LoweringOptions.def
+++ b/flang/include/flang/Lower/LoweringOptions.def
@@ -38,5 +38,10 @@ ENUM_LOWERINGOPT(Underscoring, unsigned, 1, 1)
 /// (i.e. wraps around as two's complement). Off by default.
 ENUM_LOWERINGOPT(IntegerWrapAround, unsigned, 1, 0)
 
+/// If true, add nsw flags to loop variable increments.
+/// Off by default.
+/// TODO: integrate this option with the above
+ENUM_LOWERINGOPT(NSWOnLoopVarInc, unsigned, 1, 0)
+
 #undef LOWERINGOPT
 #undef ENUM_LOWERINGOPT
diff --git a/flang/include/flang/Optimizer/Transforms/CUFCommon.h b/flang/include/flang/Optimizer/Transforms/CUFCommon.h
new file mode 100644
index 000000000000000..b88133489df5e24
--- /dev/null
+++ b/flang/include/flang/Optimizer/Transforms/CUFCommon.h
@@ -0,0 +1,25 @@
+//===-- CUFCommon.h -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_
+#define FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_
+
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
+#include "mlir/IR/BuiltinOps.h"
+
+static constexpr llvm::StringRef cudaDeviceModuleName = "cuda_device_mod";
+
+namespace cuf {
+
+/// Retrieve or create the CUDA Fortran GPU module in the given \p mod.
+mlir::gpu::GPUModuleOp getOrCreateGPUModule(mlir::ModuleOp mod,
+                                            mlir::SymbolTable &symTab);
+
+} // namespace cuf
+
+#endif // FORTRAN_OPTIMIZER_TRANSFORMS_CUFCOMMON_H_
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index e1453cfa374bfc4..e8f0a8444a31a1e 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -66,7 +66,7 @@ namespace fir {
 std::unique_ptr<mlir::Pass> createAffineDemotionPass();
 std::unique_ptr<mlir::Pass>
 createArrayValueCopyPass(fir::ArrayValueCopyOptions options = {});
-std::unique_ptr<mlir::Pass> createCFGConversionPassWithoutNSW();
+std::unique_ptr<mlir::Pass> createCFGConversionPassWithNSW();
 std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass>
@@ -83,7 +83,7 @@ createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
 
 void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns,
                                    bool forceLoopToExecuteOnce = false,
-                                   bool setNSW = true);
+                                   bool setNSW = false);
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index 54b43adbfc6bbf0..a41f0f348f27a65 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -153,7 +153,7 @@ def CFGConversion : Pass<"cfg-conversion"> {
            /*default=*/"false",
            "force the body of a loop to execute at least once">,
     Option<"setNSW", "set-nsw", "bool",
-           /*default=*/"true",
+           /*default=*/"false",
            "set nsw on loop variable increment">
   ];
 }
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 76d2f164fc4bf00..31ad1b7c6ce5b53 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -513,8 +513,8 @@ class ParseTreeDumper {
   NODE(OmpDependClause, InOut)
   NODE(OmpDependClause, Sink)
   NODE(OmpDependClause, Source)
-  NODE(parser, OmpDependenceType)
-  NODE_ENUM(OmpDependenceType, Type)
+  NODE(parser, OmpTaskDependenceType)
+  NODE_ENUM(OmpTaskDependenceType, Type)
   NODE(parser, OmpDependSinkVec)
   NODE(parser, OmpDependSinkVecLength)
   NODE(parser, OmpEndAllocators)
@@ -547,6 +547,10 @@ class ParseTreeDumper {
   NODE_ENUM(OmpOrderClause, Type)
   NODE(parser, OmpOrderModifier)
   NODE_ENUM(OmpOrderModifier, Kind)
+  NODE(parser, OmpGrainsizeClause)
+  NODE_ENUM(OmpGrainsizeClause, Prescriptiveness)
+  NODE(parser, OmpNumTasksClause)
+  NODE_ENUM(OmpNumTasksClause, Prescriptiveness)
   NODE(parser, OmpProcBindClause)
   NODE_ENUM(OmpProcBindClause, Type)
   NODE_ENUM(OmpReductionClause, ReductionModifier)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index c1884f6e88d1ec8..174f4c631e9d4cd 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3439,6 +3439,18 @@ struct OmpObject {
 
 WRAPPER_CLASS(OmpObjectList, std::list<OmpObject>);
 
+// Ref: [4.5:169-170], [5.0:254-256], [5.1:287-289], [5.2:321]
+//
+// task-dependence-type -> // "dependence-type" in 5.1 and before
+//    IN | OUT | INOUT |        // since 4.5
+//    SOURCE | SINK |           // since 4.5, until 5.1
+//    MUTEXINOUTSET | DEPOBJ |  // since 5.0
+//    INOUTSET                  // since 5.2
+struct OmpTaskDependenceType {
+  ENUM_CLASS(Type, In, Out, Inout, Source, Sink)
+  WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Type);
+};
+
 // [5.0] 2.1.6 iterator-specifier -> type-declaration-stmt = subscript-triple
 //             iterator-modifier -> iterator-specifier-list
 struct OmpIteratorSpecifier {
@@ -3534,31 +3546,40 @@ struct OmpDependSinkVecLength {
   std::tuple<DefinedOperator, ScalarIntConstantExpr> t;
 };
 
-// 2.13.9 depend-vec -> iterator [+/- depend-vec-length],...,iterator[...]
+// 2.13.9 depend-vec -> induction-variable [depend-vec-length], ...
 struct OmpDependSinkVec {
   TUPLE_CLASS_BOILERPLATE(OmpDependSinkVec);
   std::tuple<Name, std::optional<OmpDependSinkVecLength>> t;
 };
 
-// 2.13.9 depend-type -> IN | OUT | INOUT | SOURCE | SINK
-struct OmpDependenceType {
-  ENUM_CLASS(Type, In, Out, Inout, Source, Sink)
-  WRAPPER_CLASS_BOILERPLATE(OmpDependenceType, Type);
-};
-
-// 2.13.9 depend-clause -> DEPEND (((IN | OUT | INOUT) : variable-name-list) |
-//                                 SOURCE | SINK : depend-vec)
+// Ref: [4.5:169-170], [5.0:255-256], [5.1:288-289], [5.2:323-324]
+//
+// depend-clause ->
+//    DEPEND(SOURCE) |                               // since 4.5, until 5.1
+//    DEPEND(SINK: depend-vec) |                     // since 4.5, until 5.1
+//    DEPEND([depend-modifier,]dependence-type: locator-list)   // since 4.5
+//
+// depend-modifier -> iterator-modifier              // since 5.0
 struct OmpDependClause {
   UNION_CLASS_BOILERPLATE(OmpDependClause);
   EMPTY_CLASS(Source);
   WRAPPER_CLASS(Sink, std::list<OmpDependSinkVec>);
   struct InOut {
     TUPLE_CLASS_BOILERPLATE(InOut);
-    std::tuple<OmpDependenceType, std::list<Designator>> t;
+    std::tuple<std::optional<OmpIteratorModifier>, OmpTaskDependenceType,
+        OmpObjectList>
+        t;
   };
   std::variant<Source, Sink, InOut> u;
 };
 
+// OMP 5.2 12.6.1 grainsize-clause -> grainsize ([prescriptiveness :] value)
+struct OmpGrainsizeClause {
+  TUPLE_CLASS_BOILERPLATE(OmpGrainsizeClause);
+  ENUM_CLASS(Prescriptiveness, Strict);
+  std::tuple<std::optional<Prescriptiveness>, ScalarIntExpr> t;
+};
+
 // 2.12 if-clause -> IF ([ directive-name-modifier :] scalar-logical-expr)
 struct OmpIfClause {
   TUPLE_CLASS_BOILERPLATE(OmpIfClause);
@@ -3688,6 +3709,13 @@ struct OmpScheduleClause {
       t;
 };
 
+// OMP 5.2 12.6.2 num_tasks-clause -> num_tasks ([prescriptiveness :] value)
+struct OmpNumTasksClause {
+  TUPLE_CLASS_BOILERPLATE(OmpNumTasksClause);
+  ENUM_CLASS(Prescriptiveness, Strict);
+  std::tuple<std::optional<Prescriptiveness>, ScalarIntExpr> t;
+};
+
 // OpenMP Clauses
 struct OmpClause {
   UNION_CLASS_BOILERPLATE(OmpClause);
diff --git a/flang/include/flang/Runtime/CUDA/kernel.h b/flang/include/flang/Runtime/CUDA/kernel.h
index cf07d874a082c0b..85afda09e347ae4 100644
--- a/flang/include/flang/Runtime/CUDA/kernel.h
+++ b/flang/include/flang/Runtime/CUDA/kernel.h
@@ -15,13 +15,19 @@
 
 extern "C" {
 
-// This function uses intptr_t instead of CUDA's unsigned int to match
+// These functions use intptr_t instead of CUDA's unsigned int to match
 // the type of MLIR's index type. This avoids the need for casts in the
 // generated MLIR code.
+
 void RTDEF(CUFLaunchKernel)(const void *kernelName, intptr_t gridX,
     intptr_t gridY, intptr_t gridZ, intptr_t blockX, intptr_t blockY,
     intptr_t blockZ, int32_t smem, void **params, void **extra);
 
+void RTDEF(CUFLaunchClusterKernel)(const void *kernelName, intptr_t clusterX,
+    intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY,
+    intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ,
+    int32_t smem, void **params, void **extra);
+
 } // extern "C"
 
 #endif // FORTRAN_RUNTIME_CUDA_KERNEL_H_
diff --git a/flang/include/flang/Runtime/CUDA/registration.h b/flang/include/flang/Runtime/CUDA/registration.h
index 009715613e29f72..5237069a4c739c5 100644
--- a/flang/include/flang/Runtime/CUDA/registration.h
+++ b/flang/include/flang/Runtime/CUDA/registration.h
@@ -11,6 +11,7 @@
 
 #include "flang/Runtime/entry-names.h"
 #include <cstddef>
+#include <cstdint>
 
 namespace Fortran::runtime::cuda {
 
@@ -23,6 +24,10 @@ void *RTDECL(CUFRegisterModule)(void *data);
 void RTDECL(CUFRegisterFunction)(
     void **module, const char *fctSym, char *fctName);
 
+/// Register a device variable.
+void RTDECL(CUFRegisterVariable)(
+    void **module, char *varSym, const char *varName, int64_t size);
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h
index 8eb736bb098fe4e..55ef1e0ca61b9f6 100644
--- a/flang/include/flang/Semantics/openmp-directive-sets.h
+++ b/flang/include/flang/Semantics/openmp-directive-sets.h
@@ -210,7 +210,9 @@ static const OmpDirectiveSet blockConstructSet{
     Directive::OMPD_ordered,
     Directive::OMPD_parallel,
     Directive::OMPD_parallel_masked,
+    Directive::OMPD_parallel_master,
     Directive::OMPD_parallel_workshare,
+    Directive::OMPD_scope,
     Directive::OMPD_single,
     Directive::OMPD_target,
     Directive::OMPD_target_data,
@@ -281,6 +283,7 @@ static const OmpDirectiveSet workShareSet{
         Directive::OMPD_workshare,
         Directive::OMPD_parallel_workshare,
         Directive::OMPD_parallel_sections,
+        Directive::OMPD_scope,
         Directive::OMPD_sections,
         Directive::OMPD_single,
     } | allDoSet,
diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h
index 1626970600eff20..df4b21ada058fe3 100644
--- a/flang/include/flang/Tools/CrossToolHelpers.h
+++ b/flang/include/flang/Tools/CrossToolHelpers.h
@@ -122,7 +122,7 @@ struct MLIRToLLVMPassPipelineConfig : public FlangEPCallBacks {
   bool NoSignedZerosFPMath =
       false; ///< Set no-signed-zeros-fp-math attribute for functions.
   bool UnsafeFPMath = false; ///< Set unsafe-fp-math attribute for functions.
-  bool NSWOnLoopVarInc = true; ///< Add nsw flag to loop variable increments.
+  bool NSWOnLoopVarInc = false; ///< Add nsw flag to loop variable increments.
 };
 
 struct OffloadModuleOpts {
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index d37430e0e5773e2..35c2ae3c73e69e8 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -313,7 +313,6 @@ bool CompilerInstance::setUpTargetMachine() {
         << error;
     return false;
   }
-
   // Create `TargetMachine`
   const auto &CGOpts = getInvocation().getCodeGenOpts();
   std::optional<llvm::CodeGenOptLevel> OptLevelOrNone =
@@ -322,9 +321,13 @@ bool CompilerInstance::setUpTargetMachine() {
   llvm::CodeGenOptLevel OptLevel = *OptLevelOrNone;
   std::string featuresStr = getTargetFeatures();
   std::optional<llvm::CodeModel::Model> cm = getCodeModel(CGOpts.CodeModel);
+
+  llvm::TargetOptions tOpts = llvm::TargetOptions();
+  tOpts.EnableAIXExtendedAltivecABI = targetOpts.EnableAIXExtendedAltivecABI;
+
   targetMachine.reset(theTarget->createTargetMachine(
       theTriple, /*CPU=*/targetOpts.cpu,
-      /*Features=*/featuresStr, llvm::TargetOptions(),
+      /*Features=*/featuresStr, /*Options=*/tOpts,
       /*Reloc::Model=*/CGOpts.getRelocationModel(),
       /*CodeModel::Model=*/cm, OptLevel));
   assert(targetMachine && "Failed to create TargetMachine");
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 5da5236af2b0e17..1214a2ea6bf1f3f 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -457,6 +457,16 @@ static void parseTargetArgs(TargetOptions &opts, llvm::opt::ArgList &args) {
 
   if (args.hasArg(clang::driver::options::OPT_fdisable_integer_16))
     opts.disabledIntegerKinds.push_back(16);
+
+  if (const llvm::opt::Arg *a =
+          args.getLastArg(clang::driver::options::OPT_mabi_EQ)) {
+    llvm::StringRef V = a->getValue();
+    if (V == "vec-extabi") {
+      opts.EnableAIXExtendedAltivecABI = true;
+    } else if (V == "vec-default") {
+      opts.EnableAIXExtendedAltivecABI = false;
+    }
+  }
 }
 // Tweak the frontend configuration based on the frontend action
 static void setUpFrontendBasedOnAction(FrontendOptions &opts) {
@@ -1350,6 +1360,12 @@ bool CompilerInvocation::createFromArgs(
     invoc.loweringOpts.setNoPPCNativeVecElemOrder(true);
   }
 
+  // -flang-experimental-integer-overflow
+  if (args.hasArg(
+          clang::driver::options::OPT_flang_experimental_integer_overflow)) {
+    invoc.loweringOpts.setNSWOnLoopVarInc(true);
+  }
+
   // Preserve all the remark options requested, i.e. -Rpass, -Rpass-missed or
   // -Rpass-analysis. This will be used later when processing and outputting the
   // remarks generated by LLVM in ExecuteCompilerInvocation.cpp.
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 3ea242315484fd7..f2e460fc53a67f4 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -828,8 +828,8 @@ void CodeGenAction::generateLLVMIR() {
     config.VScaleMax = vsr->second;
   }
 
-  if (ci.getInvocation().getLoweringOpts().getIntegerWrapAround())
-    config.NSWOnLoopVarInc = false;
+  if (ci.getInvocation().getLoweringOpts().getNSWOnLoopVarInc())
+    config.NSWOnLoopVarInc = true;
 
   // Create the pass pipeline
   fir::createMLIRToLLVMPassPipeline(pm, config, getCurrentFile());
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a3bd1ace11da213..877fe122265dd0d 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2271,7 +2271,7 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     assert(!incrementLoopNestInfo.empty() && "empty loop nest");
     mlir::Location loc = toLocation();
     mlir::arith::IntegerOverflowFlags flags{};
-    if (!getLoweringOptions().getIntegerWrapAround())
+    if (getLoweringOptions().getNSWOnLoopVarInc())
       flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
     auto iofAttr = mlir::arith::IntegerOverflowFlagsAttr::get(
         builder->getContext(), flags);
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index b534c81a605a905..1894b0cfd1bec29 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -929,7 +929,7 @@ static void genIoLoop(Fortran::lower::AbstractConverter &converter,
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   mlir::Location loc = converter.getCurrentLocation();
   mlir::arith::IntegerOverflowFlags flags{};
-  if (!converter.getLoweringOptions().getIntegerWrapAround())
+  if (converter.getLoweringOptions().getNSWOnLoopVarInc())
     flags = bitEnumSet(flags, mlir::arith::IntegerOverflowFlags::nsw);
   auto iofAttr =
       mlir::arith::IntegerOverflowFlagsAttr::get(builder.getContext(), flags);
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index fbc031f3a93d7d7..7c254ce673855ac 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -795,35 +795,43 @@ bool ClauseProcessor::processCopyprivate(
 bool ClauseProcessor::processDepend(mlir::omp::DependClauseOps &result) const {
   fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
 
-  return findRepeatableClause<omp::clause::Depend>(
-      [&](const omp::clause::Depend &clause, const parser::CharBlock &) {
-        using Depend = omp::clause::Depend;
-        assert(std::holds_alternative<Depend::WithLocators>(clause.u) &&
-               "Only the modern form is handled at the moment");
-        auto &modern = std::get<Depend::WithLocators>(clause.u);
-        auto kind = std::get<Depend::TaskDependenceType>(modern.t);
-        auto &objects = std::get<omp::ObjectList>(modern.t);
-
-        mlir::omp::ClauseTaskDependAttr dependTypeOperand =
-            genDependKindAttr(firOpBuilder, kind);
-        result.dependKinds.append(objects.size(), dependTypeOperand);
-
-        for (const omp::Object &object : objects) {
-          assert(object.ref() && "Expecting designator");
-
-          if (evaluate::ExtractSubstring(*object.ref())) {
-            TODO(converter.getCurrentLocation(),
-                 "substring not supported for task depend");
-          } else if (evaluate::IsArrayElement(*object.ref())) {
-            TODO(converter.getCurrentLocation(),
-                 "array sections not supported for task depend");
-          }
+  auto process = [&](const omp::clause::Depend &clause,
+                     const parser::CharBlock &) {
+    using Depend = omp::clause::Depend;
+    if (!std::holds_alternative<Depend::DepType>(clause.u)) {
+      TODO(converter.getCurrentLocation(),
+           "DEPEND clause with SINK or SOURCE is not supported yet");
+    }
+    auto &depType = std::get<Depend::DepType>(clause.u);
+    auto kind = std::get<Depend::TaskDependenceType>(depType.t);
+    auto &objects = std::get<omp::ObjectList>(depType.t);
 
-          semantics::Symbol *sym = object.sym();
-          const mlir::Value variable = converter.getSymbolAddress(*sym);
-          result.dependVars.push_back(variable);
-        }
-      });
+    if (std::get<std::optional<omp::clause::Iterator>>(depType.t)) {
+      TODO(converter.getCurrentLocation(),
+           "Support for iterator modifiers is not implemented yet");
+    }
+    mlir::omp::ClauseTaskDependAttr dependTypeOperand =
+        genDependKindAttr(firOpBuilder, kind);
+    result.dependKinds.append(objects.size(), dependTypeOperand);
+
+    for (const omp::Object &object : objects) {
+      assert(object.ref() && "Expecting designator");
+
+      if (evaluate::ExtractSubstring(*object.ref())) {
+        TODO(converter.getCurrentLocation(),
+             "substring not supported for task depend");
+      } else if (evaluate::IsArrayElement(*object.ref())) {
+        TODO(converter.getCurrentLocation(),
+             "array sections not supported for task depend");
+      }
+
+      semantics::Symbol *sym = object.sym();
+      const mlir::Value variable = converter.getSymbolAddress(*sym);
+      result.dependVars.push_back(variable);
+    }
+  };
+
+  return findRepeatableClause<omp::clause::Depend>(process);
 }
 
 bool ClauseProcessor::processHasDeviceAddr(
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index ee3d74a7c631af3..9483f643acd55a6 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -555,7 +555,7 @@ Depend make(const parser::OmpClause::Depend &inp,
   using Iteration = Doacross::Vector::value_type; // LoopIterationT
 
   CLAUSET_ENUM_CONVERT( //
-      convert1, parser::OmpDependenceType::Type, Depend::TaskDependenceType,
+      convert1, parser::OmpTaskDependenceType::Type, Depend::TaskDependenceType,
       // clang-format off
       MS(In,     In)
       MS(Out,    Out)
@@ -593,17 +593,18 @@ Depend make(const parser::OmpClause::Depend &inp,
             return Doacross{{/*DependenceType=*/Doacross::DependenceType::Sink,
                              /*Vector=*/makeList(s.v, convert2)}};
           },
-          // Depend::WithLocators
+          // Depend::DepType
           [&](const wrapped::InOut &s) -> Variant {
-            auto &t0 = std::get<parser::OmpDependenceType>(s.t);
-            auto &t1 = std::get<std::list<parser::Designator>>(s.t);
-            auto convert4 = [&](const parser::Designator &t) {
-              return makeObject(t, semaCtx);
-            };
-            return Depend::WithLocators{
-                {/*TaskDependenceType=*/convert1(t0.v),
-                 /*Iterator=*/std::nullopt,
-                 /*LocatorList=*/makeList(t1, convert4)}};
+            auto &t0 =
+                std::get<std::optional<parser::OmpIteratorModifier>>(s.t);
+            auto &t1 = std::get<parser::OmpTaskDependenceType>(s.t);
+            auto &t2 = std::get<parser::OmpObjectList>(s.t);
+
+            auto &&maybeIter = maybeApply(
+                [&](auto &&s) { return makeIterator(s, semaCtx); }, t0);
+            return Depend::DepType{{/*TaskDependenceType=*/convert1(t1.v),
+                                    /*Iterator=*/std::move(maybeIter),
+                                    /*LocatorList=*/makeObjects(t2, semaCtx)}};
           },
       },
       inp.v.u)};
@@ -721,10 +722,20 @@ From make(const parser::OmpClause::From &inp,
 // Full: empty
 
 Grainsize make(const parser::OmpClause::Grainsize &inp,
-               semantics::SemanticsContext &semaCtx) {
-  // inp.v -> parser::ScalarIntExpr
-  return Grainsize{{/*Prescriptiveness=*/std::nullopt,
-                    /*GrainSize=*/makeExpr(inp.v, semaCtx)}};
+            semantics::SemanticsContext &semaCtx) {
+  // inp.v -> parser::OmpGrainsizeClause
+  using wrapped = parser::OmpGrainsizeClause;
+
+  CLAUSET_ENUM_CONVERT( //
+      convert, parser::OmpGrainsizeClause::Prescriptiveness, Grainsize::Prescriptiveness,
+      // clang-format off
+      MS(Strict,   Strict)
+      // clang-format on
+  );
+  auto &t0 = std::get<std::optional<wrapped::Prescriptiveness>>(inp.v.t);
+  auto &t1 = std::get<parser::ScalarIntExpr>(inp.v.t);
+  return Grainsize{{/*Prescriptiveness=*/maybeApply(convert, t0),
+                    /*Grainsize=*/makeExpr(t1, semaCtx)}};
 }
 
 HasDeviceAddr make(const parser::OmpClause::HasDeviceAddr &inp,
@@ -971,9 +982,20 @@ Novariants make(const parser::OmpClause::Novariants &inp,
 
 NumTasks make(const parser::OmpClause::NumTasks &inp,
               semantics::SemanticsContext &semaCtx) {
-  // inp.v -> parser::ScalarIntExpr
-  return NumTasks{{/*Prescriptiveness=*/std::nullopt,
-                   /*NumTasks=*/makeExpr(inp.v, semaCtx)}};
+  // inp.v -> parser::OmpNumTasksClause
+  using wrapped = parser::OmpNumTasksClause;
+
+  CLAUSET_ENUM_CONVERT( //
+      convert, parser::OmpNumTasksClause::Prescriptiveness,
+      NumTasks::Prescriptiveness,
+      // clang-format off
+      MS(Strict,   Strict)
+      // clang-format on
+  );
+  auto &t0 = std::get<std::optional<wrapped::Prescriptiveness>>(inp.v.t);
+  auto &t1 = std::get<parser::ScalarIntExpr>(inp.v.t);
+  return NumTasks{{/*Prescriptiveness=*/maybeApply(convert, t0),
+                   /*NumTasks=*/makeExpr(t1, semaCtx)}};
 }
 
 NumTeams make(const parser::OmpClause::NumTeams &inp,
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index fc54da8babe63e9..876feca9b6f5be2 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -76,6 +76,18 @@ struct EntryBlockArgs {
            reduction.isValid() && taskReduction.isValid() &&
            useDeviceAddr.isValid() && useDevicePtr.isValid();
   }
+
+  auto getSyms() const {
+    return llvm::concat<const semantics::Symbol *const>(
+        inReduction.syms, map.syms, priv.syms, reduction.syms,
+        taskReduction.syms, useDeviceAddr.syms, useDevicePtr.syms);
+  }
+
+  auto getVars() const {
+    return llvm::concat<const mlir::Value>(
+        inReduction.vars, map.vars, priv.vars, reduction.vars,
+        taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars);
+  }
 };
 } // namespace
 
@@ -1506,8 +1518,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
     genEntryBlock(converter, args, op->getRegion(0));
     bindEntryBlockArgs(
         converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
-    return llvm::to_vector(llvm::concat<const semantics::Symbol *const>(
-        args.priv.syms, args.reduction.syms));
+    return llvm::to_vector(args.getSyms());
   };
 
   assert((!enableDelayedPrivatization || dsp) &&
@@ -1581,11 +1592,11 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   mlir::Operation *terminator =
       lower::genOpenMPTerminator(builder, sectionsOp, loc);
 
-  auto reductionCallback = [&](mlir::Operation *op) {
+  auto genRegionEntryCB = [&](mlir::Operation *op) {
     genEntryBlock(converter, args, op->getRegion(0));
     bindEntryBlockArgs(
         converter, llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(op), args);
-    return reductionSyms;
+    return llvm::to_vector(args.getSyms());
   };
 
   // Generate nested SECTION constructs.
@@ -1611,7 +1622,7 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
         OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval,
                           llvm::omp::Directive::OMPD_section)
             .setClauses(&sectionQueue.begin()->clauses)
-            .setGenRegionEntryCb(reductionCallback),
+            .setGenRegionEntryCb(genRegionEntryCB),
         sectionQueue, sectionQueue.begin());
   }
 
@@ -1650,6 +1661,15 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
   return sectionsOp;
 }
 
+static void genScopeOp(lower::AbstractConverter &converter,
+                       lower::SymMap &symTable,
+                       semantics::SemanticsContext &semaCtx,
+                       lower::pft::Evaluation &eval, mlir::Location loc,
+                       const ConstructQueue &queue,
+                       ConstructQueue::const_iterator item) {
+  TODO(loc, "Scope construct");
+}
+
 static mlir::omp::SingleOp
 genSingleOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
             semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval,
@@ -2478,6 +2498,9 @@ static void genOMPDispatch(lower::AbstractConverter &converter,
   case llvm::omp::Directive::OMPD_simd:
     genStandaloneSimd(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
+  case llvm::omp::Directive::OMPD_scope:
+    genScopeOp(converter, symTable, semaCtx, eval, loc, queue, item);
+    break;
   case llvm::omp::Directive::OMPD_single:
     genSingleOp(converter, symTable, semaCtx, eval, loc, queue, item);
     break;
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 091b7c4c164e400..3c139f7e93405ca 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -35,11 +35,11 @@ void addCanonicalizerPassWithoutRegionSimplification(mlir::OpPassManager &pm) {
 void addCfgConversionPass(mlir::PassManager &pm,
                           const MLIRToLLVMPassPipelineConfig &config) {
   if (config.NSWOnLoopVarInc)
+    addNestedPassToAllTopLevelOperationsConditionally(
+        pm, disableCfgConversion, fir::createCFGConversionPassWithNSW);
+  else
     addNestedPassToAllTopLevelOperationsConditionally(pm, disableCfgConversion,
                                                       fir::createCFGConversion);
-  else
-    addNestedPassToAllTopLevelOperationsConditionally(
-        pm, disableCfgConversion, fir::createCFGConversionPassWithoutNSW);
 }
 
 void addAVC(mlir::PassManager &pm, const llvm::OptimizationLevel &optLevel) {
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index d20d3bc4108ce94..9eafa4ec234bddf 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -9,6 +9,7 @@ add_flang_library(FIRTransforms
   CompilerGeneratedNames.cpp
   ConstantArgumentGlobalisation.cpp
   ControlFlowConverter.cpp
+  CUFCommon.cpp
   CUFAddConstructor.cpp
   CUFDeviceGlobal.cpp
   CUFOpConversion.cpp
diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
index f260437e7104171..7cdb2f7ffe27d97 100644
--- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp
@@ -6,14 +6,23 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/Runtime/RTBuilder.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/CodeGen/Target.h"
 #include "flang/Optimizer/Dialect/CUF/CUFOps.h"
 #include "flang/Optimizer/Dialect/FIRAttr.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
+#include "flang/Optimizer/Support/DataLayout.h"
+#include "flang/Optimizer/Transforms/CUFCommon.h"
+#include "flang/Runtime/CUDA/registration.h"
 #include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/IR/Value.h"
 #include "mlir/Pass/Pass.h"
 #include "llvm/ADT/SmallVector.h"
 
@@ -22,9 +31,9 @@ namespace fir {
 #include "flang/Optimizer/Transforms/Passes.h.inc"
 } // namespace fir
 
-namespace {
+using namespace Fortran::runtime::cuda;
 
-static constexpr llvm::StringRef cudaModName{"cuda_device_mod"};
+namespace {
 
 static constexpr llvm::StringRef cudaFortranCtorName{
     "__cudaFortranConstructor"};
@@ -35,13 +44,23 @@ struct CUFAddConstructor
   void runOnOperation() override {
     mlir::ModuleOp mod = getOperation();
     mlir::SymbolTable symTab(mod);
-    mlir::OpBuilder builder{mod.getBodyRegion()};
+    mlir::OpBuilder opBuilder{mod.getBodyRegion()};
+    fir::FirOpBuilder builder(opBuilder, mod);
+    fir::KindMapping kindMap{fir::getKindMapping(mod)};
     builder.setInsertionPointToEnd(mod.getBody());
     mlir::Location loc = mod.getLoc();
     auto *ctx = mod.getContext();
     auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
+    auto idxTy = builder.getIndexType();
     auto funcTy =
         mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false);
+    std::optional<mlir::DataLayout> dl =
+        fir::support::getOrSetDataLayout(mod, /*allowDefaultLayout=*/false);
+    if (!dl) {
+      mlir::emitError(mod.getLoc(),
+                      "data layout attribute is required to perform " +
+                          getName() + "pass");
+    }
 
     // Symbol reference to CUFRegisterAllocator.
     builder.setInsertionPointToEnd(mod.getBody());
@@ -59,26 +78,70 @@ struct CUFAddConstructor
     builder.setInsertionPointToStart(func.addEntryBlock(builder));
     builder.create<mlir::LLVM::CallOp>(loc, funcTy, cufRegisterAllocatorRef);
 
-    // Register kernels
-    auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaModName);
+    auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
     if (gpuMod) {
       auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx);
       auto registeredMod = builder.create<cuf::RegisterModuleOp>(
           loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName()));
+
+      // Register kernels
       for (auto func : gpuMod.getOps<mlir::gpu::GPUFuncOp>()) {
         if (func.isKernel()) {
           auto kernelName = mlir::SymbolRefAttr::get(
-              builder.getStringAttr(cudaModName),
+              builder.getStringAttr(cudaDeviceModuleName),
               {mlir::SymbolRefAttr::get(builder.getContext(), func.getName())});
           builder.create<cuf::RegisterKernelOp>(loc, kernelName, registeredMod);
         }
       }
+
+      // Register variables
+      for (fir::GlobalOp globalOp : mod.getOps<fir::GlobalOp>()) {
+        auto attr = globalOp.getDataAttrAttr();
+        if (!attr)
+          continue;
+
+        mlir::func::FuncOp func;
+        switch (attr.getValue()) {
+        case cuf::DataAttribute::Device:
+        case cuf::DataAttribute::Constant: {
+          func = fir::runtime::getRuntimeFunc<mkRTKey(CUFRegisterVariable)>(
+              loc, builder);
+          auto fTy = func.getFunctionType();
+
+          // Global variable name
+          std::string gblNameStr = globalOp.getSymbol().getValue().str();
+          gblNameStr += '\0';
+          mlir::Value gblName = fir::getBase(
+              fir::factory::createStringLiteral(builder, loc, gblNameStr));
+
+          // Global variable size
+          auto sizeAndAlign = fir::getTypeSizeAndAlignmentOrCrash(
+              loc, globalOp.getType(), *dl, kindMap);
+          auto size =
+              builder.createIntegerConstant(loc, idxTy, sizeAndAlign.first);
+
+          // Global variable address
+          mlir::Value addr = builder.create<fir::AddrOfOp>(
+              loc, globalOp.resultType(), globalOp.getSymbol());
+
+          llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments(
+              builder, loc, fTy, registeredMod, addr, gblName, size)};
+          builder.create<fir::CallOp>(loc, func, args);
+        } break;
+        case cuf::DataAttribute::Managed:
+          TODO(loc, "registration of managed variables");
+        default:
+          break;
+        }
+        if (!func)
+          continue;
+      }
     }
     builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});
 
     // Create the llvm.global_ctor with the function.
-    // TODO: We might want to have a utility that retrieve it if already created
-    // and adds new functions.
+    // TODO: We might want to have a utility that retrieve it if already
+    // created and adds new functions.
     builder.setInsertionPointToEnd(mod.getBody());
     llvm::SmallVector<mlir::Attribute> funcs;
     funcs.push_back(
diff --git a/flang/lib/Optimizer/Transforms/CUFCommon.cpp b/flang/lib/Optimizer/Transforms/CUFCommon.cpp
new file mode 100644
index 000000000000000..5eca86529f9e17c
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/CUFCommon.cpp
@@ -0,0 +1,31 @@
+//===-- CUFCommon.cpp - Shared functions between passes ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Transforms/CUFCommon.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
+
+/// Retrieve or create the CUDA Fortran GPU module in the give in \p mod.
+mlir::gpu::GPUModuleOp cuf::getOrCreateGPUModule(mlir::ModuleOp mod,
+                                                 mlir::SymbolTable &symTab) {
+  if (auto gpuMod = symTab.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName))
+    return gpuMod;
+
+  auto *ctx = mod.getContext();
+  mod->setAttr(mlir::gpu::GPUDialect::getContainerModuleAttrName(),
+               mlir::UnitAttr::get(ctx));
+
+  mlir::OpBuilder builder(ctx);
+  auto gpuMod = builder.create<mlir::gpu::GPUModuleOp>(mod.getLoc(),
+                                                       cudaDeviceModuleName);
+  llvm::SmallVector<mlir::Attribute> targets;
+  targets.push_back(mlir::NVVM::NVVMTargetAttr::get(ctx));
+  gpuMod.setTargetsAttr(builder.getArrayAttr(targets));
+  mlir::Block::iterator insertPt(mod.getBodyRegion().front().end());
+  symTab.insert(gpuMod, insertPt);
+  return gpuMod;
+}
diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
index a4761f24f16d7be..dc39be8574f8448 100644
--- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp
@@ -11,6 +11,7 @@
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/allocatable.h"
 #include "mlir/IR/SymbolTable.h"
@@ -58,6 +59,32 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
       prepareImplicitDeviceGlobals(funcOp, symTable);
       return mlir::WalkResult::advance();
     });
+
+    // Copying the device global variable into the gpu module
+    mlir::SymbolTable parentSymTable(mod);
+    auto gpuMod =
+        parentSymTable.lookup<mlir::gpu::GPUModuleOp>(cudaDeviceModuleName);
+    if (gpuMod) {
+      mlir::SymbolTable gpuSymTable(gpuMod);
+      for (auto globalOp : mod.getOps<fir::GlobalOp>()) {
+        auto attr = globalOp.getDataAttrAttr();
+        if (!attr)
+          continue;
+        switch (attr.getValue()) {
+        case cuf::DataAttribute::Device:
+        case cuf::DataAttribute::Constant:
+        case cuf::DataAttribute::Managed: {
+          auto globalName{globalOp.getSymbol().getValue()};
+          if (gpuSymTable.lookup<fir::GlobalOp>(globalName)) {
+            break;
+          }
+          gpuSymTable.insert(globalOp->clone());
+        } break;
+        default:
+          break;
+        }
+      }
+    }
   }
 };
 } // namespace
diff --git a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp
index 5645ce6e6858c82..c64f35542a6e590 100644
--- a/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFGPUToLLVMConversion.cpp
@@ -76,11 +76,6 @@ struct GPULaunchKernelConversion
   mlir::LogicalResult
   matchAndRewrite(mlir::gpu::LaunchFuncOp op, OpAdaptor adaptor,
                   mlir::ConversionPatternRewriter &rewriter) const override {
-
-    if (op.hasClusterSize()) {
-      return mlir::failure();
-    }
-
     mlir::Location loc = op.getLoc();
     auto *ctx = rewriter.getContext();
     mlir::ModuleOp mod = op->getParentOfType<mlir::ModuleOp>();
@@ -107,37 +102,65 @@ struct GPULaunchKernelConversion
           rewriter.create<LLVM::AddressOfOp>(loc, ptrTy, kernel.getName());
     }
 
-    auto funcOp = mod.lookupSymbol<mlir::LLVM::LLVMFuncOp>(
-        RTNAME_STRING(CUFLaunchKernel));
-
     auto llvmIntPtrType = mlir::IntegerType::get(
         ctx, this->getTypeConverter()->getPointerBitwidth(0));
     auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx);
-    auto funcTy = mlir::LLVM::LLVMFunctionType::get(
-        voidTy,
-        {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType,
-         llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy},
-        /*isVarArg=*/false);
-
-    auto cufLaunchKernel = mlir::SymbolRefAttr::get(
-        mod.getContext(), RTNAME_STRING(CUFLaunchKernel));
-    if (!funcOp) {
-      mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
-      rewriter.setInsertionPointToStart(mod.getBody());
-      auto launchKernelFuncOp = rewriter.create<mlir::LLVM::LLVMFuncOp>(
-          loc, RTNAME_STRING(CUFLaunchKernel), funcTy);
-      launchKernelFuncOp.setVisibility(mlir::SymbolTable::Visibility::Private);
-    }
 
     mlir::Value nullPtr = rewriter.create<LLVM::ZeroOp>(loc, ptrTy);
 
-    rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
-        op, funcTy, cufLaunchKernel,
-        mlir::ValueRange{kernelPtr, adaptor.getGridSizeX(),
-                         adaptor.getGridSizeY(), adaptor.getGridSizeZ(),
-                         adaptor.getBlockSizeX(), adaptor.getBlockSizeY(),
-                         adaptor.getBlockSizeZ(), dynamicMemorySize, kernelArgs,
-                         nullPtr});
+    if (op.hasClusterSize()) {
+      auto funcOp = mod.lookupSymbol<mlir::LLVM::LLVMFuncOp>(
+          RTNAME_STRING(CUFLaunchClusterKernel));
+      auto funcTy = mlir::LLVM::LLVMFunctionType::get(
+          voidTy,
+          {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType,
+           llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType,
+           llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy},
+          /*isVarArg=*/false);
+      auto cufLaunchClusterKernel = mlir::SymbolRefAttr::get(
+          mod.getContext(), RTNAME_STRING(CUFLaunchClusterKernel));
+      if (!funcOp) {
+        mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
+        rewriter.setInsertionPointToStart(mod.getBody());
+        auto launchKernelFuncOp = rewriter.create<mlir::LLVM::LLVMFuncOp>(
+            loc, RTNAME_STRING(CUFLaunchClusterKernel), funcTy);
+        launchKernelFuncOp.setVisibility(
+            mlir::SymbolTable::Visibility::Private);
+      }
+      rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+          op, funcTy, cufLaunchClusterKernel,
+          mlir::ValueRange{kernelPtr, adaptor.getClusterSizeX(),
+                           adaptor.getClusterSizeY(), adaptor.getClusterSizeZ(),
+                           adaptor.getGridSizeX(), adaptor.getGridSizeY(),
+                           adaptor.getGridSizeZ(), adaptor.getBlockSizeX(),
+                           adaptor.getBlockSizeY(), adaptor.getBlockSizeZ(),
+                           dynamicMemorySize, kernelArgs, nullPtr});
+    } else {
+      auto funcOp = mod.lookupSymbol<mlir::LLVM::LLVMFuncOp>(
+          RTNAME_STRING(CUFLaunchKernel));
+      auto funcTy = mlir::LLVM::LLVMFunctionType::get(
+          voidTy,
+          {ptrTy, llvmIntPtrType, llvmIntPtrType, llvmIntPtrType,
+           llvmIntPtrType, llvmIntPtrType, llvmIntPtrType, i32Ty, ptrTy, ptrTy},
+          /*isVarArg=*/false);
+      auto cufLaunchKernel = mlir::SymbolRefAttr::get(
+          mod.getContext(), RTNAME_STRING(CUFLaunchKernel));
+      if (!funcOp) {
+        mlir::OpBuilder::InsertionGuard insertGuard(rewriter);
+        rewriter.setInsertionPointToStart(mod.getBody());
+        auto launchKernelFuncOp = rewriter.create<mlir::LLVM::LLVMFuncOp>(
+            loc, RTNAME_STRING(CUFLaunchKernel), funcTy);
+        launchKernelFuncOp.setVisibility(
+            mlir::SymbolTable::Visibility::Private);
+      }
+      rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
+          op, funcTy, cufLaunchKernel,
+          mlir::ValueRange{kernelPtr, adaptor.getGridSizeX(),
+                           adaptor.getGridSizeY(), adaptor.getGridSizeZ(),
+                           adaptor.getBlockSizeX(), adaptor.getBlockSizeY(),
+                           adaptor.getBlockSizeZ(), dynamicMemorySize,
+                           kernelArgs, nullPtr});
+    }
 
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 9c2b882c7f46fef..f1f3a95b220df5f 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -15,6 +15,7 @@
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/Support/DataLayout.h"
+#include "flang/Optimizer/Transforms/CUFCommon.h"
 #include "flang/Runtime/CUDA/allocatable.h"
 #include "flang/Runtime/CUDA/common.h"
 #include "flang/Runtime/CUDA/descriptor.h"
@@ -111,7 +112,7 @@ mlir::Value getDeviceAddress(mlir::PatternRewriter &rewriter,
     switch (attr.getValue()) {
     case cuf::DataAttribute::Device:
     case cuf::DataAttribute::Managed:
-    case cuf::DataAttribute::Pinned:
+    case cuf::DataAttribute::Constant:
       isDevGlobal = true;
       break;
     default:
@@ -172,7 +173,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op,
   return mlir::success();
 }
 
-struct CufAllocateOpConversion
+struct CUFAllocateOpConversion
     : public mlir::OpRewritePattern<cuf::AllocateOp> {
   using OpRewritePattern::OpRewritePattern;
 
@@ -215,7 +216,7 @@ struct CufAllocateOpConversion
   }
 };
 
-struct CufDeallocateOpConversion
+struct CUFDeallocateOpConversion
     : public mlir::OpRewritePattern<cuf::DeallocateOp> {
   using OpRewritePattern::OpRewritePattern;
 
@@ -283,10 +284,10 @@ static int computeWidth(mlir::Location loc, mlir::Type type,
   return width;
 }
 
-struct CufAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
+struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
   using OpRewritePattern::OpRewritePattern;
 
-  CufAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl,
+  CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl,
                        const fir::LLVMTypeConverter *typeConverter)
       : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {}
 
@@ -379,7 +380,7 @@ struct CufAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> {
   const fir::LLVMTypeConverter *typeConverter;
 };
 
-struct CufFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> {
+struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> {
   using OpRewritePattern::OpRewritePattern;
 
   mlir::LogicalResult
@@ -428,11 +429,11 @@ struct CufFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> {
   }
 };
 
-struct CufDataTransferOpConversion
+struct CUFDataTransferOpConversion
     : public mlir::OpRewritePattern<cuf::DataTransferOp> {
   using OpRewritePattern::OpRewritePattern;
 
-  CufDataTransferOpConversion(mlir::MLIRContext *context,
+  CUFDataTransferOpConversion(mlir::MLIRContext *context,
                               const mlir::SymbolTable &symtab)
       : OpRewritePattern(context), symtab{symtab} {}
 
@@ -620,6 +621,69 @@ struct CufDataTransferOpConversion
   const mlir::SymbolTable &symtab;
 };
 
+struct CUFLaunchOpConversion
+    : public mlir::OpRewritePattern<cuf::KernelLaunchOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  CUFLaunchOpConversion(mlir::MLIRContext *context,
+                        const mlir::SymbolTable &symTab)
+      : OpRewritePattern(context), symTab{symTab} {}
+
+  mlir::LogicalResult
+  matchAndRewrite(cuf::KernelLaunchOp op,
+                  mlir::PatternRewriter &rewriter) const override {
+    mlir::Location loc = op.getLoc();
+    auto idxTy = mlir::IndexType::get(op.getContext());
+    auto zero = rewriter.create<mlir::arith::ConstantOp>(
+        loc, rewriter.getIntegerType(32), rewriter.getI32IntegerAttr(0));
+    auto gridSizeX =
+        rewriter.create<mlir::arith::IndexCastOp>(loc, idxTy, op.getGridX());
+    auto gridSizeY =
+        rewriter.create<mlir::arith::IndexCastOp>(loc, idxTy, op.getGridY());
+    auto gridSizeZ =
+        rewriter.create<mlir::arith::IndexCastOp>(loc, idxTy, op.getGridZ());
+    auto blockSizeX =
+        rewriter.create<mlir::arith::IndexCastOp>(loc, idxTy, op.getBlockX());
+    auto blockSizeY =
+        rewriter.create<mlir::arith::IndexCastOp>(loc, idxTy, op.getBlockY());
+    auto blockSizeZ =
+        rewriter.create<mlir::arith::IndexCastOp>(loc, idxTy, op.getBlockZ());
+    auto kernelName = mlir::SymbolRefAttr::get(
+        rewriter.getStringAttr(cudaDeviceModuleName),
+        {mlir::SymbolRefAttr::get(
+            rewriter.getContext(),
+            op.getCallee().getLeafReference().getValue())});
+    mlir::Value clusterDimX, clusterDimY, clusterDimZ;
+    if (auto funcOp = symTab.lookup<mlir::func::FuncOp>(
+            op.getCallee().getLeafReference())) {
+      if (auto clusterDimsAttr = funcOp->getAttrOfType<cuf::ClusterDimsAttr>(
+              cuf::getClusterDimsAttrName())) {
+        clusterDimX = rewriter.create<mlir::arith::ConstantIndexOp>(
+            loc, clusterDimsAttr.getX().getInt());
+        clusterDimY = rewriter.create<mlir::arith::ConstantIndexOp>(
+            loc, clusterDimsAttr.getY().getInt());
+        clusterDimZ = rewriter.create<mlir::arith::ConstantIndexOp>(
+            loc, clusterDimsAttr.getZ().getInt());
+      }
+    }
+    auto gpuLaunchOp = rewriter.create<mlir::gpu::LaunchFuncOp>(
+        loc, kernelName, mlir::gpu::KernelDim3{gridSizeX, gridSizeY, gridSizeZ},
+        mlir::gpu::KernelDim3{blockSizeX, blockSizeY, blockSizeZ}, zero,
+        op.getArgs());
+    if (clusterDimX && clusterDimY && clusterDimZ) {
+      gpuLaunchOp.getClusterSizeXMutable().assign(clusterDimX);
+      gpuLaunchOp.getClusterSizeYMutable().assign(clusterDimY);
+      gpuLaunchOp.getClusterSizeZMutable().assign(clusterDimZ);
+    }
+    rewriter.replaceOp(op, gpuLaunchOp);
+    return mlir::success();
+  }
+
+private:
+  const mlir::SymbolTable &symTab;
+};
+
 class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
 public:
   void runOnOperation() override {
@@ -637,7 +701,8 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
         fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false);
     fir::LLVMTypeConverter typeConverter(module, /*applyTBAA=*/false,
                                          /*forceUnifiedTBAATree=*/false, *dl);
-    target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect>();
+    target.addLegalDialect<fir::FIROpsDialect, mlir::arith::ArithDialect,
+                           mlir::gpu::GPUDialect>();
     cuf::populateCUFToFIRConversionPatterns(typeConverter, *dl, symtab,
                                             patterns);
     if (mlir::failed(mlir::applyPartialConversion(getOperation(), target,
@@ -653,8 +718,9 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase<CUFOpConversion> {
 void cuf::populateCUFToFIRConversionPatterns(
     const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl,
     const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) {
-  patterns.insert<CufAllocOpConversion>(patterns.getContext(), &dl, &converter);
-  patterns.insert<CufAllocateOpConversion, CufDeallocateOpConversion,
-                  CufFreeOpConversion>(patterns.getContext());
-  patterns.insert<CufDataTransferOpConversion>(patterns.getContext(), symtab);
+  patterns.insert<CUFAllocOpConversion>(patterns.getContext(), &dl, &converter);
+  patterns.insert<CUFAllocateOpConversion, CUFDeallocateOpConversion,
+                  CUFFreeOpConversion>(patterns.getContext());
+  patterns.insert<CUFDataTransferOpConversion, CUFLaunchOpConversion>(
+      patterns.getContext(), symtab);
 }
diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
index 411bf7f364a6028..3b79d6d311b71ca 100644
--- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
+++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp
@@ -332,6 +332,8 @@ class CfgConversion : public fir::impl::CFGConversionBase<CfgConversion> {
 public:
   using CFGConversionBase<CfgConversion>::CFGConversionBase;
 
+  CfgConversion(bool setNSW) { this->setNSW = setNSW; }
+
   void runOnOperation() override {
     auto *context = &this->getContext();
     mlir::RewritePatternSet patterns(context);
@@ -364,8 +366,6 @@ void fir::populateCfgConversionRewrites(mlir::RewritePatternSet &patterns,
       patterns.getContext(), forceLoopToExecuteOnce, setNSW);
 }
 
-std::unique_ptr<mlir::Pass> fir::createCFGConversionPassWithoutNSW() {
-  fir::CFGConversionOptions options;
-  options.setNSW = false;
-  return fir::createCFGConversion(options);
+std::unique_ptr<mlir::Pass> fir::createCFGConversionPassWithNSW() {
+  return std::make_unique<CfgConversion>(true);
 }
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
index 1ab6c76dae8eda7..a070c87137fa16e 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp
@@ -271,6 +271,19 @@ static bool canCacheThisType(mlir::LLVM::DICompositeTypeAttr comTy) {
   return true;
 }
 
+std::pair<std::uint64_t, unsigned short>
+DebugTypeGenerator::getFieldSizeAndAlign(mlir::Type fieldTy) {
+  mlir::Type llvmTy;
+  if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(fieldTy))
+    llvmTy = llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy));
+  else
+    llvmTy = llvmTypeConverter.convertType(fieldTy);
+
+  uint64_t byteSize = dataLayout->getTypeSize(llvmTy);
+  unsigned short byteAlign = dataLayout->getTypeABIAlignment(llvmTy);
+  return std::pair{byteSize, byteAlign};
+}
+
 mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType(
     fir::RecordType Ty, mlir::LLVM::DIFileAttr fileAttr,
     mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) {
@@ -303,15 +316,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType(
   mlir::IntegerType intTy = mlir::IntegerType::get(context, 64);
   std::uint64_t offset = 0;
   for (auto [fieldName, fieldTy] : Ty.getTypeList()) {
-    mlir::Type llvmTy;
-    if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(fieldTy))
-      llvmTy =
-          llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy));
-    else
-      llvmTy = llvmTypeConverter.convertType(fieldTy);
-
-    uint64_t byteSize = dataLayout->getTypeSize(llvmTy);
-    unsigned short byteAlign = dataLayout->getTypeABIAlignment(llvmTy);
+    auto [byteSize, byteAlign] = getFieldSizeAndAlign(fieldTy);
     std::optional<llvm::ArrayRef<int64_t>> lowerBounds =
         fir::getComponentLowerBoundsIfNonDefault(Ty, fieldName, module,
                                                  symbolTable);
@@ -368,6 +373,42 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType(
   return finalAttr;
 }
 
+mlir::LLVM::DITypeAttr DebugTypeGenerator::convertTupleType(
+    mlir::TupleType Ty, mlir::LLVM::DIFileAttr fileAttr,
+    mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) {
+  // Check if this type has already been converted.
+  auto iter = typeCache.find(Ty);
+  if (iter != typeCache.end())
+    return iter->second;
+
+  llvm::SmallVector<mlir::LLVM::DINodeAttr> elements;
+  mlir::MLIRContext *context = module.getContext();
+
+  std::uint64_t offset = 0;
+  for (auto fieldTy : Ty.getTypes()) {
+    auto [byteSize, byteAlign] = getFieldSizeAndAlign(fieldTy);
+    mlir::LLVM::DITypeAttr elemTy =
+        convertType(fieldTy, fileAttr, scope, /*declOp=*/nullptr);
+    offset = llvm::alignTo(offset, byteAlign);
+    mlir::LLVM::DIDerivedTypeAttr tyAttr = mlir::LLVM::DIDerivedTypeAttr::get(
+        context, llvm::dwarf::DW_TAG_member, mlir::StringAttr::get(context, ""),
+        elemTy, byteSize * 8, byteAlign * 8, offset * 8,
+        /*optional<address space>=*/std::nullopt,
+        /*extra data=*/nullptr);
+    elements.push_back(tyAttr);
+    offset += llvm::alignTo(byteSize, byteAlign);
+  }
+
+  auto typeAttr = mlir::LLVM::DICompositeTypeAttr::get(
+      context, llvm::dwarf::DW_TAG_structure_type,
+      mlir::StringAttr::get(context, ""), fileAttr, /*line=*/0, scope,
+      /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero, offset * 8,
+      /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr,
+      /*allocated=*/nullptr, /*associated=*/nullptr);
+  typeCache[Ty] = typeAttr;
+  return typeAttr;
+}
+
 mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType(
     fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr,
     mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) {
@@ -574,6 +615,8 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr,
                                 /*hasDescriptor=*/false);
   } else if (auto recTy = mlir::dyn_cast_or_null<fir::RecordType>(Ty)) {
     return convertRecordType(recTy, fileAttr, scope, declOp);
+  } else if (auto tupleTy = mlir::dyn_cast_if_present<mlir::TupleType>(Ty)) {
+    return convertTupleType(tupleTy, fileAttr, scope, declOp);
   } else if (auto refTy = mlir::dyn_cast_if_present<fir::ReferenceType>(Ty)) {
     auto elTy = refTy.getEleTy();
     return convertPointerLikeType(elTy, fileAttr, scope, declOp,
@@ -581,6 +624,10 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr,
                                   /*genAssociated=*/false);
   } else if (auto vecTy = mlir::dyn_cast_or_null<fir::VectorType>(Ty)) {
     return convertVectorType(vecTy, fileAttr, scope, declOp);
+  } else if (mlir::isa<mlir::IndexType>(Ty)) {
+    return genBasicType(context, mlir::StringAttr::get(context, "integer"),
+                        llvmTypeConverter.getIndexTypeBitwidth(),
+                        llvm::dwarf::DW_ATE_signed);
   } else if (auto boxTy = mlir::dyn_cast_or_null<fir::BoxType>(Ty)) {
     auto elTy = boxTy.getElementType();
     if (auto seqTy = mlir::dyn_cast_or_null<fir::SequenceType>(elTy))
diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
index eeefb6c463d9366..c1fce4bdae5ce5e 100644
--- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
+++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h
@@ -39,6 +39,10 @@ class DebugTypeGenerator {
                                            mlir::LLVM::DIFileAttr fileAttr,
                                            mlir::LLVM::DIScopeAttr scope,
                                            fir::cg::XDeclareOp declOp);
+  mlir::LLVM::DITypeAttr convertTupleType(mlir::TupleType Ty,
+                                          mlir::LLVM::DIFileAttr fileAttr,
+                                          mlir::LLVM::DIScopeAttr scope,
+                                          fir::cg::XDeclareOp declOp);
   mlir::LLVM::DITypeAttr convertSequenceType(fir::SequenceType seqTy,
                                              mlir::LLVM::DIFileAttr fileAttr,
                                              mlir::LLVM::DIScopeAttr scope,
@@ -73,6 +77,8 @@ class DebugTypeGenerator {
                              mlir::LLVM::DIFileAttr fileAttr,
                              mlir::LLVM::DIScopeAttr scope,
                              fir::cg::XDeclareOp declOp);
+  std::pair<std::uint64_t, unsigned short>
+  getFieldSizeAndAlign(mlir::Type fieldTy);
 
   mlir::ModuleOp module;
   mlir::SymbolTable *symbolTable;
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 59a8757e58e8cc4..5276e1ec1dcadd7 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -365,10 +365,10 @@ TYPE_PARSER(construct<OmpDependSinkVecLength>(
 TYPE_PARSER(
     construct<OmpDependSinkVec>(name, maybe(Parser<OmpDependSinkVecLength>{})))
 
-TYPE_PARSER(
-    construct<OmpDependenceType>("IN"_id >> pure(OmpDependenceType::Type::In) ||
-        "INOUT" >> pure(OmpDependenceType::Type::Inout) ||
-        "OUT" >> pure(OmpDependenceType::Type::Out)))
+TYPE_PARSER(construct<OmpTaskDependenceType>(
+    "IN"_id >> pure(OmpTaskDependenceType::Type::In) ||
+    "INOUT" >> pure(OmpTaskDependenceType::Type::Inout) ||
+    "OUT" >> pure(OmpTaskDependenceType::Type::Out)))
 
 TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US,
     construct<OmpDependClause>(construct<OmpDependClause::Sink>(
@@ -376,7 +376,8 @@ TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US,
         construct<OmpDependClause>(
             construct<OmpDependClause::Source>("SOURCE"_tok)) ||
         construct<OmpDependClause>(construct<OmpDependClause::InOut>(
-            Parser<OmpDependenceType>{}, ":" >> nonemptyList(designator))))
+            maybe(Parser<OmpIteratorModifier>{} / ","_tok),
+            Parser<OmpTaskDependenceType>{} / ":", Parser<OmpObjectList>{})))
 
 // 2.15.3.7 LINEAR (linear-list: linear-step)
 //          linear-list -> list | modifier(list)
@@ -408,6 +409,16 @@ TYPE_PARSER(construct<OmpOrderClause>(
     maybe(Parser<OmpOrderModifier>{} / ":"),
     "CONCURRENT" >> pure(OmpOrderClause::Type::Concurrent)))
 
+// OMP 5.2 12.6.1 grainsize([ prescriptiveness :] scalar-integer-expression)
+TYPE_PARSER(construct<OmpGrainsizeClause>(
+    maybe("STRICT" >> pure(OmpGrainsizeClause::Prescriptiveness::Strict) / ":"),
+    scalarIntExpr))
+
+// OMP 5.2 12.6.2 num_tasks([ prescriptiveness :] scalar-integer-expression)
+TYPE_PARSER(construct<OmpNumTasksClause>(
+    maybe("STRICT" >> pure(OmpNumTasksClause::Prescriptiveness::Strict) / ":"),
+    scalarIntExpr))
+
 TYPE_PARSER(
     construct<OmpObject>(designator) || construct<OmpObject>("/" >> name / "/"))
 
@@ -464,7 +475,7 @@ TYPE_PARSER(
     "FROM" >> construct<OmpClause>(construct<OmpClause::From>(
                   parenthesized(Parser<OmpObjectList>{}))) ||
     "GRAINSIZE" >> construct<OmpClause>(construct<OmpClause::Grainsize>(
-                       parenthesized(scalarIntExpr))) ||
+                       parenthesized(Parser<OmpGrainsizeClause>{}))) ||
     "HAS_DEVICE_ADDR" >>
         construct<OmpClause>(construct<OmpClause::HasDeviceAddr>(
             parenthesized(Parser<OmpObjectList>{}))) ||
@@ -491,7 +502,7 @@ TYPE_PARSER(
         construct<OmpClause>(construct<OmpClause::Notinbranch>()) ||
     "NOWAIT" >> construct<OmpClause>(construct<OmpClause::Nowait>()) ||
     "NUM_TASKS" >> construct<OmpClause>(construct<OmpClause::NumTasks>(
-                       parenthesized(scalarIntExpr))) ||
+                       parenthesized(Parser<OmpNumTasksClause>{}))) ||
     "NUM_TEAMS" >> construct<OmpClause>(construct<OmpClause::NumTeams>(
                        parenthesized(scalarIntExpr))) ||
     "NUM_THREADS" >> construct<OmpClause>(construct<OmpClause::NumThreads>(
@@ -572,12 +583,19 @@ TYPE_PARSER(sourced(construct<OmpLoopDirective>(first(
     "MASKED TASKLOOP SIMD" >>
         pure(llvm::omp::Directive::OMPD_masked_taskloop_simd),
     "MASKED TASKLOOP" >> pure(llvm::omp::Directive::OMPD_masked_taskloop),
+    "MASTER TASKLOOP SIMD" >>
+        pure(llvm::omp::Directive::OMPD_master_taskloop_simd),
+    "MASTER TASKLOOP" >> pure(llvm::omp::Directive::OMPD_master_taskloop),
     "PARALLEL DO SIMD" >> pure(llvm::omp::Directive::OMPD_parallel_do_simd),
     "PARALLEL DO" >> pure(llvm::omp::Directive::OMPD_parallel_do),
     "PARALLEL MASKED TASKLOOP SIMD" >>
         pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd),
     "PARALLEL MASKED TASKLOOP" >>
         pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop),
+    "PARALLEL MASTER TASKLOOP SIMD" >>
+        pure(llvm::omp::Directive::OMPD_parallel_master_taskloop_simd),
+    "PARALLEL MASTER TASKLOOP" >>
+        pure(llvm::omp::Directive::OMPD_parallel_master_taskloop),
     "SIMD" >> pure(llvm::omp::Directive::OMPD_simd),
     "TARGET LOOP" >> pure(llvm::omp::Directive::OMPD_target_loop),
     "TARGET PARALLEL DO SIMD" >>
@@ -695,8 +713,10 @@ TYPE_PARSER(construct<OmpBlockDirective>(first(
     "MASTER" >> pure(llvm::omp::Directive::OMPD_master),
     "ORDERED" >> pure(llvm::omp::Directive::OMPD_ordered),
     "PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked),
+    "PARALLEL MASTER" >> pure(llvm::omp::Directive::OMPD_parallel_master),
     "PARALLEL WORKSHARE" >> pure(llvm::omp::Directive::OMPD_parallel_workshare),
     "PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel),
+    "SCOPE" >> pure(llvm::omp::Directive::OMPD_scope),
     "SINGLE" >> pure(llvm::omp::Directive::OMPD_single),
     "TARGET DATA" >> pure(llvm::omp::Directive::OMPD_target_data),
     "TARGET PARALLEL" >> pure(llvm::omp::Directive::OMPD_target_parallel),
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 04df988223e8f8d..e80ab0da1360eb5 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2196,6 +2196,16 @@ class UnparseVisitor {
     Walk(std::get<std::optional<OmpOrderModifier>>(x.t), ":");
     Walk(std::get<OmpOrderClause::Type>(x.t));
   }
+  void Unparse(const OmpGrainsizeClause &x) {
+    Walk(std::get<std::optional<OmpGrainsizeClause::Prescriptiveness>>(x.t),
+        ":");
+    Walk(std::get<ScalarIntExpr>(x.t));
+  }
+  void Unparse(const OmpNumTasksClause &x) {
+    Walk(
+        std::get<std::optional<OmpNumTasksClause::Prescriptiveness>>(x.t), ":");
+    Walk(std::get<ScalarIntExpr>(x.t));
+  }
   void Unparse(const OmpDependSinkVecLength &x) {
     Walk(std::get<DefinedOperator>(x.t));
     Walk(std::get<ScalarIntConstantExpr>(x.t));
@@ -2206,9 +2216,9 @@ class UnparseVisitor {
   }
   void Unparse(const OmpDependClause::InOut &x) {
     Put("(");
-    Walk(std::get<OmpDependenceType>(x.t));
+    Walk(std::get<OmpTaskDependenceType>(x.t));
     Put(":");
-    Walk(std::get<std::list<Designator>>(x.t), ",");
+    Walk(std::get<OmpObjectList>(x.t));
     Put(")");
   }
   bool Pre(const OmpDependClause &x) {
@@ -2264,6 +2274,12 @@ class UnparseVisitor {
     case llvm::omp::Directive::OMPD_masked_taskloop:
       Word("MASKED TASKLOOP");
       break;
+    case llvm::omp::Directive::OMPD_master_taskloop_simd:
+      Word("MASTER TASKLOOP SIMD");
+      break;
+    case llvm::omp::Directive::OMPD_master_taskloop:
+      Word("MASTER TASKLOOP");
+      break;
     case llvm::omp::Directive::OMPD_parallel_do:
       Word("PARALLEL DO ");
       break;
@@ -2276,6 +2292,12 @@ class UnparseVisitor {
     case llvm::omp::Directive::OMPD_parallel_masked_taskloop:
       Word("PARALLEL MASKED TASKLOOP");
       break;
+    case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd:
+      Word("PARALLEL MASTER TASKLOOP SIMD");
+      break;
+    case llvm::omp::Directive::OMPD_parallel_master_taskloop:
+      Word("PARALLEL MASTER TASKLOOP");
+      break;
     case llvm::omp::Directive::OMPD_simd:
       Word("SIMD ");
       break;
@@ -2380,12 +2402,18 @@ class UnparseVisitor {
     case llvm::omp::Directive::OMPD_parallel_masked:
       Word("PARALLEL MASKED");
       break;
+    case llvm::omp::Directive::OMPD_parallel_master:
+      Word("PARALLEL MASTER");
+      break;
     case llvm::omp::Directive::OMPD_parallel_workshare:
       Word("PARALLEL WORKSHARE ");
       break;
     case llvm::omp::Directive::OMPD_parallel:
       Word("PARALLEL ");
       break;
+    case llvm::omp::Directive::OMPD_scope:
+      Word("SCOPE ");
+      break;
     case llvm::omp::Directive::OMPD_single:
       Word("SINGLE ");
       break;
@@ -2816,7 +2844,7 @@ class UnparseVisitor {
       OmpLastprivateClause, LastprivateModifier) // OMP lastprivate-modifier
   WALK_NESTED_ENUM(OmpScheduleModifierType, ModType) // OMP schedule-modifier
   WALK_NESTED_ENUM(OmpLinearModifier, Type) // OMP linear-modifier
-  WALK_NESTED_ENUM(OmpDependenceType, Type) // OMP dependence-type
+  WALK_NESTED_ENUM(OmpTaskDependenceType, Type) // OMP task-dependence-type
   WALK_NESTED_ENUM(OmpScheduleClause, ScheduleType) // OMP schedule-type
   WALK_NESTED_ENUM(OmpDeviceClause, DeviceModifier) // OMP device modifier
   WALK_NESTED_ENUM(OmpDeviceTypeClause, Type) // OMP DEVICE_TYPE
@@ -2826,6 +2854,9 @@ class UnparseVisitor {
   WALK_NESTED_ENUM(OmpCancelType, Type) // OMP cancel-type
   WALK_NESTED_ENUM(OmpOrderClause, Type) // OMP order-type
   WALK_NESTED_ENUM(OmpOrderModifier, Kind) // OMP order-modifier
+  WALK_NESTED_ENUM(
+      OmpGrainsizeClause, Prescriptiveness) // OMP grainsize-modifier
+  WALK_NESTED_ENUM(OmpNumTasksClause, Prescriptiveness) // OMP numtasks-modifier
   WALK_NESTED_ENUM(OmpMapClause, Type) // OMP map-type
   WALK_NESTED_ENUM(OmpMapClause, TypeModifier) // OMP map-type-modifier
 #undef WALK_NESTED_ENUM
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 46486907ceb9e1f..8f3eb9fefee6784 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -38,6 +38,16 @@ namespace Fortran::semantics {
     CheckAllowedClause(llvm::omp::Y); \
   }
 
+std::string ThisVersion(unsigned version) {
+  std::string tv{
+      std::to_string(version / 10) + "." + std::to_string(version % 10)};
+  return "OpenMP v" + tv;
+}
+
+std::string TryVersion(unsigned version) {
+  return "try -fopenmp-version=" + std::to_string(version);
+}
+
 // 'OmpWorkshareBlockChecker' is used to check the validity of the assignment
 // statements and the expressions enclosed in an OpenMP Workshare construct
 class OmpWorkshareBlockChecker {
@@ -200,14 +210,10 @@ bool OmpStructureChecker::CheckAllowedClause(llvmOmpClause clause) {
       auto clauseName{parser::ToUpperCaseLetters(getClauseName(clause).str())};
       auto dirName{parser::ToUpperCaseLetters(getDirectiveName(dir).str())};
 
-      std::string thisVersion{
-          std::to_string(version / 10) + "." + std::to_string(version % 10)};
-      std::string goodVersion{std::to_string(allowedInVersion)};
-
       context_.Say(dirCtx.clauseSource,
-          "%s clause is not allowed on directive %s in OpenMP v%s, "
-          "try -fopenmp-version=%d"_err_en_US,
-          clauseName, dirName, thisVersion, allowedInVersion);
+          "%s clause is not allowed on directive %s in %s, %s"_err_en_US,
+          clauseName, dirName, ThisVersion(version),
+          TryVersion(allowedInVersion));
     }
   }
   return CheckAllowed(clause);
@@ -972,6 +978,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
     HasInvalidWorksharingNesting(
         beginDir.source, llvm::omp::nestedWorkshareErrSet);
     break;
+  case llvm::omp::Directive::OMPD_scope:
   case llvm::omp::Directive::OMPD_single:
     // TODO: This check needs to be extended while implementing nesting of
     // regions checks.
@@ -1864,6 +1871,9 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) {
   const auto &dir{std::get<parser::OmpBlockDirective>(x.t)};
   ResetPartialContext(dir.source);
   switch (dir.v) {
+  case llvm::omp::Directive::OMPD_scope:
+    PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_end_scope);
+    break;
   // 2.7.3 end-single-clause -> copyprivate-clause |
   //                            nowait-clause
   case llvm::omp::Directive::OMPD_single:
@@ -1886,7 +1896,8 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) {
 // end_workshareare popped as they are pushed while entering the
 // EndBlockDirective.
 void OmpStructureChecker::Leave(const parser::OmpEndBlockDirective &x) {
-  if ((GetContext().directive == llvm::omp::Directive::OMPD_end_single) ||
+  if ((GetContext().directive == llvm::omp::Directive::OMPD_end_scope) ||
+      (GetContext().directive == llvm::omp::Directive::OMPD_end_single) ||
       (GetContext().directive == llvm::omp::Directive::OMPD_end_workshare)) {
     dirContext_.pop_back();
   }
@@ -2474,12 +2485,14 @@ CHECK_SIMPLE_CLAUSE(Final, OMPC_final)
 CHECK_SIMPLE_CLAUSE(Flush, OMPC_flush)
 CHECK_SIMPLE_CLAUSE(From, OMPC_from)
 CHECK_SIMPLE_CLAUSE(Full, OMPC_full)
+CHECK_SIMPLE_CLAUSE(Grainsize, OMPC_grainsize)
 CHECK_SIMPLE_CLAUSE(Hint, OMPC_hint)
 CHECK_SIMPLE_CLAUSE(Holds, OMPC_holds)
 CHECK_SIMPLE_CLAUSE(InReduction, OMPC_in_reduction)
 CHECK_SIMPLE_CLAUSE(Inclusive, OMPC_inclusive)
 CHECK_SIMPLE_CLAUSE(Match, OMPC_match)
 CHECK_SIMPLE_CLAUSE(Nontemporal, OMPC_nontemporal)
+CHECK_SIMPLE_CLAUSE(NumTasks, OMPC_num_tasks)
 CHECK_SIMPLE_CLAUSE(Order, OMPC_order)
 CHECK_SIMPLE_CLAUSE(Read, OMPC_read)
 CHECK_SIMPLE_CLAUSE(Threadprivate, OMPC_threadprivate)
@@ -2530,8 +2543,6 @@ CHECK_SIMPLE_CLAUSE(OmpxBare, OMPC_ompx_bare)
 CHECK_SIMPLE_CLAUSE(Fail, OMPC_fail)
 CHECK_SIMPLE_CLAUSE(Weak, OMPC_weak)
 
-CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize)
-CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks)
 CHECK_REQ_SCALAR_INT_CLAUSE(NumTeams, OMPC_num_teams)
 CHECK_REQ_SCALAR_INT_CLAUSE(NumThreads, OMPC_num_threads)
 CHECK_REQ_SCALAR_INT_CLAUSE(OmpxDynCgroupMem, OMPC_ompx_dyn_cgroup_mem)
@@ -3283,18 +3294,33 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) {
         parser::ToUpperCaseLetters(getDirectiveName(GetContext().directive)));
   }
   if (const auto *inOut{std::get_if<parser::OmpDependClause::InOut>(&x.v.u)}) {
-    const auto &designators{std::get<std::list<parser::Designator>>(inOut->t)};
-    for (const auto &ele : designators) {
-      if (const auto *dataRef{std::get_if<parser::DataRef>(&ele.u)}) {
-        CheckDependList(*dataRef);
-        if (const auto *arr{
-                std::get_if<common::Indirection<parser::ArrayElement>>(
-                    &dataRef->u)}) {
-          CheckArraySection(arr->value(), GetLastName(*dataRef),
-              llvm::omp::Clause::OMPC_depend);
+    for (const auto &object : std::get<parser::OmpObjectList>(inOut->t).v) {
+      if (const auto *name{std::get_if<parser::Name>(&object.u)}) {
+        context_.Say(GetContext().clauseSource,
+            "Common block name ('%s') cannot appear in a DEPEND "
+            "clause"_err_en_US,
+            name->ToString());
+      } else if (auto *designator{std::get_if<parser::Designator>(&object.u)}) {
+        if (auto *dataRef{std::get_if<parser::DataRef>(&designator->u)}) {
+          CheckDependList(*dataRef);
+          if (const auto *arr{
+                  std::get_if<common::Indirection<parser::ArrayElement>>(
+                      &dataRef->u)}) {
+            CheckArraySection(arr->value(), GetLastName(*dataRef),
+                llvm::omp::Clause::OMPC_depend);
+          }
         }
       }
     }
+    if (std::get<std::optional<parser::OmpIteratorModifier>>(inOut->t)) {
+      unsigned version{context_.langOptions().OpenMPVersion};
+      unsigned allowedInVersion{50};
+      if (version < allowedInVersion) {
+        context_.Say(GetContext().clauseSource,
+            "Iterator modifiers are not supported in %s, %s"_warn_en_US,
+            ThisVersion(version), TryVersion(allowedInVersion));
+      }
+    }
   }
 }
 
@@ -3367,8 +3393,8 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Lastprivate &x) {
           std::to_string(version / 10) + "." + std::to_string(version % 10)};
       context_.Say(GetContext().clauseSource,
           "LASTPRIVATE clause with CONDITIONAL modifier is not "
-          "allowed in OpenMP v%s, try -fopenmp-version=%d"_err_en_US,
-          thisVersion, allowedInVersion);
+          "allowed in %s, %s"_err_en_US,
+          ThisVersion(version), TryVersion(allowedInVersion));
     }
   }
 }
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 33936ba4c2b34f1..5e3ad5f3b4773db 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -435,6 +435,20 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
   bool Pre(const parser::OpenMPAllocatorsConstruct &);
   void Post(const parser::OpenMPAllocatorsConstruct &);
 
+  void Post(const parser::OmpObjectList &x) {
+    // The objects from OMP clauses should have already been resolved,
+    // except common blocks (the ResolveNamesVisitor does not visit
+    // parser::Name, those are dealt with as members of other structures).
+    // Iterate over elements of x, and resolve any common blocks that
+    // are still unresolved.
+    for (const parser::OmpObject &obj : x.v) {
+      auto *name{std::get_if<parser::Name>(&obj.u)};
+      if (name && !name->symbol) {
+        Resolve(*name, currScope().MakeCommonBlock(name->source));
+      }
+    }
+  }
+
   // 2.15.3 Data-Sharing Attribute Clauses
   void Post(const parser::OmpDefaultClause &);
   bool Pre(const parser::OmpClause::Shared &x) {
@@ -531,16 +545,9 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor<llvm::omp::Directive> {
     return false;
   }
 
-  bool Pre(const parser::OmpDependClause &x) {
-    if (const auto *dependSink{
-            std::get_if<parser::OmpDependClause::Sink>(&x.u)}) {
-      const auto &dependSinkVec{dependSink->v};
-      for (const auto &dependSinkElement : dependSinkVec) {
-        const auto &name{std::get<parser::Name>(dependSinkElement.t)};
-        ResolveName(&name);
-      }
-    }
-    return false;
+  void Post(const parser::OmpDependSinkVec &x) {
+    const auto &name{std::get<parser::Name>(x.t)};
+    ResolveName(&name);
   }
 
   bool Pre(const parser::OmpClause::UseDevicePtr &x) {
@@ -1524,8 +1531,10 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) {
   case llvm::omp::Directive::OMPD_masked:
   case llvm::omp::Directive::OMPD_parallel_masked:
   case llvm::omp::Directive::OMPD_master:
+  case llvm::omp::Directive::OMPD_parallel_master:
   case llvm::omp::Directive::OMPD_ordered:
   case llvm::omp::Directive::OMPD_parallel:
+  case llvm::omp::Directive::OMPD_scope:
   case llvm::omp::Directive::OMPD_single:
   case llvm::omp::Directive::OMPD_target:
   case llvm::omp::Directive::OMPD_target_data:
@@ -1542,7 +1551,8 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) {
     // TODO others
     break;
   }
-  if (beginDir.v == llvm::omp::Directive::OMPD_master)
+  if (beginDir.v == llvm::omp::Directive::OMPD_master ||
+      beginDir.v == llvm::omp::Directive::OMPD_parallel_master)
     IssueNonConformanceWarning(beginDir.v, beginDir.source);
   ClearDataSharingAttributeObjects();
   ClearPrivateDataSharingAttributeObjects();
@@ -1555,8 +1565,11 @@ void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) {
   const auto &beginDir{std::get<parser::OmpBlockDirective>(beginBlockDir.t)};
   switch (beginDir.v) {
   case llvm::omp::Directive::OMPD_masked:
+  case llvm::omp::Directive::OMPD_master:
   case llvm::omp::Directive::OMPD_parallel_masked:
+  case llvm::omp::Directive::OMPD_parallel_master:
   case llvm::omp::Directive::OMPD_parallel:
+  case llvm::omp::Directive::OMPD_scope:
   case llvm::omp::Directive::OMPD_single:
   case llvm::omp::Directive::OMPD_target:
   case llvm::omp::Directive::OMPD_task:
@@ -1625,10 +1638,14 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
   case llvm::omp::Directive::OMPD_loop:
   case llvm::omp::Directive::OMPD_masked_taskloop_simd:
   case llvm::omp::Directive::OMPD_masked_taskloop:
+  case llvm::omp::Directive::OMPD_master_taskloop_simd:
+  case llvm::omp::Directive::OMPD_master_taskloop:
   case llvm::omp::Directive::OMPD_parallel_do:
   case llvm::omp::Directive::OMPD_parallel_do_simd:
   case llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd:
   case llvm::omp::Directive::OMPD_parallel_masked_taskloop:
+  case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd:
+  case llvm::omp::Directive::OMPD_parallel_master_taskloop:
   case llvm::omp::Directive::OMPD_simd:
   case llvm::omp::Directive::OMPD_target_loop:
   case llvm::omp::Directive::OMPD_target_parallel_do:
@@ -1653,7 +1670,11 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) {
   default:
     break;
   }
-  if (beginDir.v == llvm::omp::Directive::OMPD_target_loop)
+  if (beginDir.v == llvm::omp::OMPD_master_taskloop ||
+      beginDir.v == llvm::omp::OMPD_master_taskloop_simd ||
+      beginDir.v == llvm::omp::OMPD_parallel_master_taskloop ||
+      beginDir.v == llvm::omp::OMPD_parallel_master_taskloop_simd ||
+      beginDir.v == llvm::omp::Directive::OMPD_target_loop)
     IssueNonConformanceWarning(beginDir.v, beginDir.source);
   ClearDataSharingAttributeObjects();
   SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList));
@@ -2050,6 +2071,8 @@ void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) {
 static bool IsPrivatizable(const Symbol *sym) {
   auto *misc{sym->detailsIf<MiscDetails>()};
   return !IsProcedure(*sym) && !IsNamedConstant(*sym) &&
+      !semantics::IsAssumedSizeArray(
+          *sym) && /* OpenMP 5.2, 5.1.1: Assumed-size arrays are shared*/
       !sym->owner().IsDerivedType() &&
       sym->owner().kind() != Scope::Kind::ImpliedDos &&
       !sym->detailsIf<semantics::AssocEntityDetails>() &&
@@ -2880,18 +2903,39 @@ void OmpAttributeVisitor::AddOmpRequiresToScope(Scope &scope,
 
 void OmpAttributeVisitor::IssueNonConformanceWarning(
     llvm::omp::Directive D, parser::CharBlock source) {
-  std::string warnStr = "";
-  std::string dirName = llvm::omp::getOpenMPDirectiveName(D).str();
+  std::string warnStr;
+  llvm::raw_string_ostream warnStrOS(warnStr);
+  warnStrOS << "OpenMP directive "
+            << parser::ToUpperCaseLetters(
+                   llvm::omp::getOpenMPDirectiveName(D).str())
+            << " has been deprecated";
+
+  auto setAlternativeStr = [&warnStrOS](llvm::StringRef alt) {
+    warnStrOS << ", please use " << alt << " instead.";
+  };
   switch (D) {
   case llvm::omp::OMPD_master:
-    warnStr = "OpenMP directive '" + dirName +
-        "' has been deprecated, please use 'masked' instead.";
+    setAlternativeStr("MASKED");
+    break;
+  case llvm::omp::OMPD_master_taskloop:
+    setAlternativeStr("MASKED TASKLOOP");
+    break;
+  case llvm::omp::OMPD_master_taskloop_simd:
+    setAlternativeStr("MASKED TASKLOOP SIMD");
+    break;
+  case llvm::omp::OMPD_parallel_master:
+    setAlternativeStr("PARALLEL MASKED");
+    break;
+  case llvm::omp::OMPD_parallel_master_taskloop:
+    setAlternativeStr("PARALLEL MASKED TASKLOOP");
+    break;
+  case llvm::omp::OMPD_parallel_master_taskloop_simd:
+    setAlternativeStr("PARALLEL_MASKED TASKLOOP SIMD");
     break;
   case llvm::omp::OMPD_target_loop:
-  default:
-    warnStr = "OpenMP directive '" + dirName + "' has been deprecated.";
+  default:;
   }
-  context_.Warn(
-      common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US, warnStr);
+  context_.Warn(common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US,
+      warnStrOS.str());
 }
 } // namespace Fortran::semantics
diff --git a/flang/runtime/CUDA/kernel.cpp b/flang/runtime/CUDA/kernel.cpp
index f81153a1af4bc77..abb7ebb72e59231 100644
--- a/flang/runtime/CUDA/kernel.cpp
+++ b/flang/runtime/CUDA/kernel.cpp
@@ -25,9 +25,32 @@ void RTDEF(CUFLaunchKernel)(const void *kernel, intptr_t gridX, intptr_t gridY,
   blockDim.x = blockX;
   blockDim.y = blockY;
   blockDim.z = blockZ;
-  cudaStream_t stream = 0;
+  cudaStream_t stream = 0; // TODO stream managment
   CUDA_REPORT_IF_ERROR(
       cudaLaunchKernel(kernel, gridDim, blockDim, params, smem, stream));
 }
 
+void RTDEF(CUFLaunchClusterKernel)(const void *kernel, intptr_t clusterX,
+    intptr_t clusterY, intptr_t clusterZ, intptr_t gridX, intptr_t gridY,
+    intptr_t gridZ, intptr_t blockX, intptr_t blockY, intptr_t blockZ,
+    int32_t smem, void **params, void **extra) {
+  cudaLaunchConfig_t config;
+  config.gridDim.x = gridX;
+  config.gridDim.y = gridY;
+  config.gridDim.z = gridZ;
+  config.blockDim.x = blockX;
+  config.blockDim.y = blockY;
+  config.blockDim.z = blockZ;
+  config.dynamicSmemBytes = smem;
+  config.stream = 0; // TODO stream managment
+  cudaLaunchAttribute launchAttr[1];
+  launchAttr[0].id = cudaLaunchAttributeClusterDimension;
+  launchAttr[0].val.clusterDim.x = clusterX;
+  launchAttr[0].val.clusterDim.y = clusterY;
+  launchAttr[0].val.clusterDim.z = clusterZ;
+  config.numAttrs = 1;
+  config.attrs = launchAttr;
+  CUDA_REPORT_IF_ERROR(cudaLaunchKernelExC(&config, kernel, params));
+}
+
 } // extern "C"
diff --git a/flang/runtime/CUDA/registration.cpp b/flang/runtime/CUDA/registration.cpp
index 20d274c4d8d1c2d..b7b6ef389bffba9 100644
--- a/flang/runtime/CUDA/registration.cpp
+++ b/flang/runtime/CUDA/registration.cpp
@@ -21,6 +21,9 @@ extern void __cudaRegisterFatBinaryEnd(void *);
 extern void __cudaRegisterFunction(void **fatCubinHandle, const char *hostFun,
     char *deviceFun, const char *deviceName, int thread_limit, uint3 *tid,
     uint3 *bid, dim3 *bDim, dim3 *gDim, int *wSize);
+extern void __cudaRegisterVar(void **fatCubinHandle, char *hostVar,
+    const char *deviceAddress, const char *deviceName, int ext, size_t size,
+    int constant, int global);
 
 void *RTDECL(CUFRegisterModule)(void *data) {
   void **fatHandle{__cudaRegisterFatBinary(data)};
@@ -34,6 +37,11 @@ void RTDEF(CUFRegisterFunction)(
       (uint3 *)0, (dim3 *)0, (dim3 *)0, (int *)0);
 }
 
+void RTDEF(CUFRegisterVariable)(
+    void **module, char *varSym, const char *varName, int64_t size) {
+  __cudaRegisterVar(module, varSym, varName, varName, 0, size, 0, 0);
+}
+
 } // extern "C"
 
 } // namespace Fortran::runtime::cuda
diff --git a/flang/test/Driver/frontend-forwarding.f90 b/flang/test/Driver/frontend-forwarding.f90
index 55a74ccf40467b2..ff2d66095214648 100644
--- a/flang/test/Driver/frontend-forwarding.f90
+++ b/flang/test/Driver/frontend-forwarding.f90
@@ -20,6 +20,7 @@
 ! RUN:     -fversion-loops-for-stride \
 ! RUN:     -flang-experimental-hlfir \
 ! RUN:     -flang-deprecated-no-hlfir \
+! RUN:     -flang-experimental-integer-overflow \
 ! RUN:     -fno-ppc-native-vector-element-order \
 ! RUN:     -fppc-native-vector-element-order \
 ! RUN:     -mllvm -print-before-all \
@@ -51,6 +52,7 @@
 ! CHECK: "-fversion-loops-for-stride"
 ! CHECK: "-flang-experimental-hlfir"
 ! CHECK: "-flang-deprecated-no-hlfir"
+! CHECK: "-flang-experimental-integer-overflow"
 ! CHECK: "-fno-ppc-native-vector-element-order"
 ! CHECK: "-fppc-native-vector-element-order"
 ! CHECK: "-Rpass"
diff --git a/flang/test/Driver/mabi.f90 b/flang/test/Driver/mabi.f90
new file mode 100644
index 000000000000000..88fd4d2a993fb04
--- /dev/null
+++ b/flang/test/Driver/mabi.f90
@@ -0,0 +1,17 @@
+! RUN: not %flang -### -c --target=powerpc64le-unknown-linux -mabi=vec-extabi %s 2>&1 | FileCheck --check-prefix=INVALID1 %s
+! RUN: not %flang -### -c --target=x86_64-unknown-linux -mabi=vec-extabi %s 2>&1 | FileCheck --check-prefix=INVALID2 %s
+! RUN: not %flang -### -c --target=powerpc-unknown-aix -mabi=abc %s 2>&1 | FileCheck --check-prefix=INVALID3 %s
+! RUN: %flang -### -c -target powerpc-unknown-aix %s 2>&1 | FileCheck --implicit-check-not=vec-extabi %s
+! RUN: %flang -### -c -target powerpc-unknown-aix -mabi=vec-default %s 2>&1 | FileCheck --implicit-check-not=vec-extabi %s
+! RUN: %flang -### -c -target powerpc-unknown-aix -mabi=vec-extabi %s 2>&1 | FileCheck --check-prefix=EXTABI %s
+
+! REQUIRES: target=powerpc{{.*}}
+
+! INVALID1: error: unsupported option '-mabi=vec-extabi' for target '{{.*}}'
+! INVALID2: error: unsupported option '-mabi=' for target '{{.*}}'
+! INVALID3: error: unsupported argument 'abc' to option '-mabi='
+
+! EXTABI: "-fc1"
+! EXTABI-SAME: "-mabi=vec-extabi"
+
+
diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90
new file mode 100644
index 000000000000000..378dabbb7c7e7d9
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90
@@ -0,0 +1,22 @@
+// RUN: fir-opt --split-input-file --cuf-add-constructor %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+
+  fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
+
+  gpu.module @cuda_device_mod [#nvvm.target] {
+  }
+}
+
+// CHECK: gpu.module @cuda_device_mod [#nvvm.target] 
+
+// CHECK: llvm.func internal @__cudaFortranConstructor() {
+// CHECK-DAG: %[[MODULE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr
+// CHECK-DAG: %[[VAR_NAME:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref<!fir.char<1,12>>
+// CHECK-DAG: %[[VAR_ADDR:.*]] = fir.address_of(@_QMmtestsEn) : !fir.ref<!fir.array<5xi32>>
+// CHECK-DAG: %[[MODULE2:.*]] = fir.convert %[[MODULE]] : (!llvm.ptr) -> !fir.ref<!fir.llvm_ptr<i8>>
+// CHECK-DAG: %[[VAR_ADDR2:.*]] = fir.convert %[[VAR_ADDR]] : (!fir.ref<!fir.array<5xi32>>) -> !fir.ref<i8>
+// CHECK-DAG: %[[VAR_NAME2:.*]] = fir.convert %[[VAR_NAME]] : (!fir.ref<!fir.char<1,12>>) -> !fir.ref<i8>
+// CHECK-DAG: %[[CST:.*]] = arith.constant 20 : index
+// CHECK-DAG %[[CST2:.*]] = fir.convert %[[CST]] : (index) -> i64
+// CHECK fir.call @_FortranACUFRegisterVariable(%[[MODULE2]], %[[VAR_ADDR2]], %[[VAR_NAME2]], %[[CST2]]) : (!fir.ref<!fir.llvm_ptr<i8>>, !fir.ref<i8>, !fir.ref<i8>, i64) -> none
diff --git a/flang/test/Fir/CUDA/cuda-device-global.f90 b/flang/test/Fir/CUDA/cuda-device-global.f90
new file mode 100644
index 000000000000000..c83a938d5af2141
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-device-global.f90
@@ -0,0 +1,13 @@
+
+// RUN: fir-opt --split-input-file --cuf-device-global %s | FileCheck %s
+
+
+module attributes {fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module} {
+  fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
+
+  gpu.module @cuda_device_mod [#nvvm.target] {
+  }
+}
+
+// CHECK: gpu.module @cuda_device_mod [#nvvm.target] 
+// CHECK-NEXT: fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda<device>} : !fir.array<5xi32>
diff --git a/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir b/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir
index f10bd82f978dc4d..7fede7c6c17b780 100644
--- a/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir
+++ b/flang/test/Fir/CUDA/cuda-gpu-launch-func.mlir
@@ -1,4 +1,4 @@
-// RUN: fir-opt --cuf-gpu-convert-to-llvm %s | FileCheck %s
+// RUN: fir-opt --split-input-file --cuf-gpu-convert-to-llvm %s | FileCheck %s
 
 module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (git@github.com:clementval/llvm-project.git ddcfd4d2dc17bf66cee8c3ef6284118684a2b0e6)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
   llvm.func @_QMmod1Phost_sub() {
@@ -102,3 +102,25 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
 
 // CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1 : !llvm.ptr
 // CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], {{.*}})
+
+// -----
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (git@github.com:clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+  llvm.func @_FortranACUFLaunchClusterKernel(!llvm.ptr, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, !llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"}
+  llvm.func @_QMmod1Psub1() attributes {cuf.cluster_dims = #cuf.cluster_dims<x = 2 : i64, y = 2 : i64, z = 1 : i64>} {
+    llvm.return
+  }
+  llvm.func @_QQmain() attributes {fir.bindc_name = "test"} {
+    %0 = llvm.mlir.constant(1 : index) : i64
+    %1 = llvm.mlir.constant(2 : index) : i64
+    %2 = llvm.mlir.constant(0 : i32) : i32
+    %3 = llvm.mlir.constant(10 : index) : i64
+    gpu.launch_func  @cuda_device_mod::@_QMmod1Psub1 clusters in (%1, %1, %0) blocks in (%3, %3, %0) threads in (%3, %3, %0) : i64 dynamic_shared_memory_size %2
+    llvm.return
+  }
+  gpu.binary @cuda_device_mod  [#gpu.object<#nvvm.target, "">]
+}
+
+// CHECK-LABEL: llvm.func @_QQmain()
+// CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1
+// CHECK: llvm.call @_FortranACUFLaunchClusterKernel(%[[KERNEL_PTR]], {{.*}})
diff --git a/flang/test/Fir/CUDA/cuda-launch.fir b/flang/test/Fir/CUDA/cuda-launch.fir
new file mode 100644
index 000000000000000..f11bcbdb7fce55b
--- /dev/null
+++ b/flang/test/Fir/CUDA/cuda-launch.fir
@@ -0,0 +1,64 @@
+// RUN: fir-opt --split-input-file --cuf-convert %s | FileCheck %s
+
+
+module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+  gpu.module @cuda_device_mod {
+    gpu.func @_QPsub_device1() kernel {
+      cf.br ^bb1
+    ^bb1:  // pred: ^bb0
+      gpu.return
+    }
+    gpu.func @_QPsub_device2(%arg0: !fir.ref<f32>) kernel {
+      cf.br ^bb1(%arg0 : !fir.ref<f32>)
+    ^bb1(%0: !fir.ref<f32>):  // pred: ^bb0
+      %1 = fir.declare %0 {uniq_name = "_QFsub1Ei"} : (!fir.ref<f32>) -> !fir.ref<f32>
+      %cst = arith.constant 2.000000e+00 : f32
+      fir.store %cst to %1 : !fir.ref<f32>
+      gpu.return
+    }
+  }
+
+  func.func @_QQmain() attributes {fir.bindc_name = "main"} {
+    %0 = fir.alloca f32
+    // CHECK: %[[ALLOCA:.*]] = fir.alloca f32
+    %c1 = arith.constant 1 : index
+    %c11_i32 = arith.constant 11 : i32
+    %c6_i32 = arith.constant 6 : i32
+    %c1_i32 = arith.constant 1 : i32
+    // CHECK: gpu.launch_func  @cuda_device_mod::@_QPsub_device1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})  dynamic_shared_memory_size %c0{{.*}}
+    cuf.kernel_launch @cuda_device_mod::@_QPsub_device1<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>()
+
+    // CHECK: gpu.launch_func  @cuda_device_mod::@_QPsub_device2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})  dynamic_shared_memory_size %c0{{.*}} args(%[[ALLOCA]] : !fir.ref<f32>)
+    cuf.kernel_launch @cuda_device_mod::@_QPsub_device2<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>(%0) : (!fir.ref<f32>)
+    return
+  }
+
+}
+
+// -----
+
+module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+  gpu.module @cuda_device_mod {
+    gpu.func @_QMmod1Psub1(%arg0: !fir.ref<!fir.array<10xi32>>) kernel {
+      gpu.return
+    }
+  }
+
+  func.func @_QMmod1Psub1(%arg0: !fir.ref<!fir.array<10xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "adev"}) attributes {cuf.cluster_dims = #cuf.cluster_dims<x = 2 : i64, y = 2 : i64, z = 1 : i64>, cuf.proc_attr = #cuf.cuda_proc<global>} {
+    return
+  }
+  func.func @_QMmod1Phost_sub() {
+    %c10 = arith.constant 10 : index
+    %0 = cuf.alloc !fir.array<10xi32> {bindc_name = "adev", data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEadev"} -> !fir.ref<!fir.array<10xi32>>
+    %1 = fir.shape %c10 : (index) -> !fir.shape<1>
+    %2:2 = hlfir.declare %0(%1) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmod1Fhost_subEadev"} : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<10xi32>>, !fir.ref<!fir.array<10xi32>>)
+    %c1_i32 = arith.constant 1 : i32
+    %c10_i32 = arith.constant 10 : i32
+    cuf.kernel_launch @_QMmod1Psub1<<<%c1_i32, %c1_i32, %c1_i32, %c10_i32, %c1_i32, %c1_i32>>>(%2#1) : (!fir.ref<!fir.array<10xi32>>)
+    return
+  }
+}
+
+// CHECK-LABEL: func.func @_QMmod1Phost_sub()
+// CHECK: gpu.launch_func  @cuda_device_mod::@_QMmod1Psub1 clusters in (%c2{{.*}}, %c2{{.*}}, %c1{{.*}})
+
diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
index 00f8e6e6cc9a6b2..335877e7c9a8725 100644
--- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
+++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir
@@ -358,10 +358,10 @@ func.func @_QPopenmp_target_data_region() {
       %9 = arith.subi %8, %c1_i64 : i64
       %10 = fir.coordinate_of %0, %9 : (!fir.ref<!fir.array<1024xi32>>, i64) -> !fir.ref<i32>
       fir.store %6 to %10 : !fir.ref<i32>
-      %11 = arith.addi %arg0, %c1 overflow<nsw> : index
+      %11 = arith.addi %arg0, %c1 : index
       %12 = fir.convert %c1 : (index) -> i32
       %13 = fir.load %1 : !fir.ref<i32>
-      %14 = arith.addi %13, %12 overflow<nsw> : i32
+      %14 = arith.addi %13, %12 : i32
       fir.result %11, %14 : index, i32
     }
     fir.store %5#1 to %1 : !fir.ref<i32>
@@ -404,11 +404,11 @@ func.func @_QPopenmp_target_data_region() {
 // CHECK:             %[[VAL_21:.*]] = llvm.sub %[[VAL_19]], %[[VAL_20]]  : i64
 // CHECK:             %[[VAL_22:.*]] = llvm.getelementptr %[[VAL_1]][0, %[[VAL_21]]] : (!llvm.ptr, i64) -> !llvm.ptr
 // CHECK:             llvm.store %[[VAL_17]], %[[VAL_22]] : i32, !llvm.ptr
-// CHECK:             %[[VAL_23:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] overflow<nsw> : i64
+// CHECK:             %[[VAL_23:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]]  : i64
 // CHECK:             %[[VAL_24:.*]] = llvm.trunc %[[VAL_8]] : i64 to i32
 // CHECK:             %[[VAL_25:.*]] = llvm.load %[[VAL_3]] : !llvm.ptr -> i32
-// CHECK:             %[[VAL_26:.*]] = llvm.add %[[VAL_25]], %[[VAL_24]] overflow<nsw> : i32
-// CHECK:             %[[VAL_27:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]] overflow<nsw> : i64
+// CHECK:             %[[VAL_26:.*]] = llvm.add %[[VAL_25]], %[[VAL_24]]  : i32
+// CHECK:             %[[VAL_27:.*]] = llvm.add %[[VAL_12]], %[[VAL_8]]  : i64
 // CHECK:             %[[VAL_28:.*]] = llvm.mlir.constant(1 : index) : i64
 // CHECK:             %[[VAL_29:.*]] = llvm.sub %[[VAL_14]], %[[VAL_28]]  : i64
 // CHECK:             llvm.br ^bb1(%[[VAL_27]], %[[VAL_26]], %[[VAL_29]] : i64, i32, i64)
diff --git a/flang/test/Fir/loop01.fir b/flang/test/Fir/loop01.fir
index 30d10b9bbdb9792..c1cbb522c378c0e 100644
--- a/flang/test/Fir/loop01.fir
+++ b/flang/test/Fir/loop01.fir
@@ -1,7 +1,5 @@
 // RUN: fir-opt --split-input-file --cfg-conversion %s | FileCheck %s
-// RUN: fir-opt --split-input-file --cfg-conversion="set-nsw=false" %s | FileCheck %s --check-prefix=NO-NSW
-
-// NO-NSW-NOT: overflow<nsw>
+// RUN: fir-opt --split-input-file --cfg-conversion="set-nsw=true" %s | FileCheck %s --check-prefix=NSW
 
 func.func @x(%lb : index, %ub : index, %step : index, %b : i1, %addr : !fir.ref<index>) {
   fir.do_loop %iv = %lb to %ub step %step unordered {
@@ -37,7 +35,7 @@ func.func private @f2() -> i1
 // CHECK:       fir.store %[[VAL_12]] to %[[VAL_4]] : !fir.ref<index>
 // CHECK:       br ^bb5
 // CHECK:     ^bb5:
-// CHECK:       %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:       %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : index
 // CHECK:       %[[VAL_14:.*]] = arith.constant 1 : index
 // CHECK:       %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index
 // CHECK:       br ^bb1(%[[VAL_13]], %[[VAL_15]] : index, index)
@@ -46,6 +44,34 @@ func.func private @f2() -> i1
 // CHECK:     }
 // CHECK:     func private @f2() -> i1
 
+// NSW:     func @x(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: index, %[[VAL_3:.*]]: i1, %[[VAL_4:.*]]: !fir.ref<index>) {
+// NSW:       %[[VAL_5:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:       %[[VAL_6:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index
+// NSW:       %[[VAL_7:.*]] = arith.divsi %[[VAL_6]], %[[VAL_2]] : index
+// NSW:       br ^bb1(%[[VAL_0]], %[[VAL_7]] : index, index)
+// NSW:     ^bb1(%[[VAL_8:.*]]: index, %[[VAL_9:.*]]: index):
+// NSW:       %[[VAL_10:.*]] = arith.constant 0 : index
+// NSW:       %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index
+// NSW:       cond_br %[[VAL_11]], ^bb2, ^bb6
+// NSW:     ^bb2:
+// NSW:       cond_br %[[VAL_3]], ^bb3, ^bb4
+// NSW:     ^bb3:
+// NSW:       fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ref<index>
+// NSW:       br ^bb5
+// NSW:     ^bb4:
+// NSW:       %[[VAL_12:.*]] = arith.constant 0 : index
+// NSW:       fir.store %[[VAL_12]] to %[[VAL_4]] : !fir.ref<index>
+// NSW:       br ^bb5
+// NSW:     ^bb5:
+// NSW:       %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] overflow<nsw> : index
+// NSW:       %[[VAL_14:.*]] = arith.constant 1 : index
+// NSW:       %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index
+// NSW:       br ^bb1(%[[VAL_13]], %[[VAL_15]] : index, index)
+// NSW:     ^bb6:
+// NSW:       return
+// NSW:     }
+// NSW:     func private @f2() -> i1
+
 // -----
 
 func.func @x2(%lo : index, %up : index, %ok : i1) {
@@ -75,13 +101,36 @@ func.func private @f3(i16)
 // CHECK:     cond_br %[[VAL_14]], ^bb2, ^bb3
 // CHECK:   ^bb2:
 // CHECK:     %[[VAL_15:.*]] = fir.call @f2() : () -> i1
-// CHECK:     %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] overflow<nsw> : index
+// CHECK:     %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] : index
 // CHECK:     br ^bb1(%[[VAL_16]], %[[VAL_15]] : index, i1)
 // CHECK:   ^bb3:
 // CHECK:     return
 // CHECK:   }
 // CHECK:   func private @f3(i16)
 
+// NSW:   func @x2(%[[VAL_0:.*]]: index, %[[VAL_1:.*]]: index, %[[VAL_2:.*]]: i1) {
+// NSW:     %[[VAL_3:.*]] = arith.constant 1 : index
+// NSW:     br ^bb1(%[[VAL_0]], %[[VAL_2]] : index, i1)
+// NSW:   ^bb1(%[[VAL_4:.*]]: index, %[[VAL_5:.*]]: i1):
+// NSW:     %[[VAL_6:.*]] = arith.constant 0 : index
+// NSW:     %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_3]] : index
+// NSW:     %[[VAL_8:.*]] = arith.cmpi sle, %[[VAL_4]], %[[VAL_1]] : index
+// NSW:     %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_3]], %[[VAL_6]] : index
+// NSW:     %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_4]] : index
+// NSW:     %[[VAL_11:.*]] = arith.andi %[[VAL_7]], %[[VAL_8]] : i1
+// NSW:     %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:     %[[VAL_13:.*]] = arith.ori %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:     %[[VAL_14:.*]] = arith.andi %[[VAL_5]], %[[VAL_13]] : i1
+// NSW:     cond_br %[[VAL_14]], ^bb2, ^bb3
+// NSW:   ^bb2:
+// NSW:     %[[VAL_15:.*]] = fir.call @f2() : () -> i1
+// NSW:     %[[VAL_16:.*]] = arith.addi %[[VAL_4]], %[[VAL_3]] overflow<nsw> : index
+// NSW:     br ^bb1(%[[VAL_16]], %[[VAL_15]] : index, i1)
+// NSW:   ^bb3:
+// NSW:     return
+// NSW:   }
+// NSW:   func private @f3(i16)
+
 // -----
 
 // do_loop with an extra loop-carried value
@@ -110,7 +159,7 @@ func.func @x3(%lo : index, %up : index) -> i1 {
 // CHECK:           cond_br %[[VAL_11]], ^bb2, ^bb3
 // CHECK:         ^bb2:
 // CHECK:           %[[VAL_12:.*]] = fir.call @f2() : () -> i1
-// CHECK:           %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] : index
 // CHECK:           %[[VAL_14:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index
 // CHECK:           br ^bb1(%[[VAL_13]], %[[VAL_12]], %[[VAL_15]] : index, i1, index)
@@ -118,6 +167,29 @@ func.func @x3(%lo : index, %up : index) -> i1 {
 // CHECK:           return %[[VAL_8]] : i1
 // CHECK:         }
 
+// NSW-LABEL:   func @x3(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> i1 {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant true
+// NSW:           %[[VAL_4:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:           %[[VAL_5:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index
+// NSW:           %[[VAL_6:.*]] = arith.divsi %[[VAL_5]], %[[VAL_2]] : index
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_6]] : index, i1, index)
+// NSW:         ^bb1(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i1, %[[VAL_9:.*]]: index):
+// NSW:           %[[VAL_10:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index
+// NSW:           cond_br %[[VAL_11]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_12:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_13:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           %[[VAL_14:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_15:.*]] = arith.subi %[[VAL_9]], %[[VAL_14]] : index
+// NSW:           br ^bb1(%[[VAL_13]], %[[VAL_12]], %[[VAL_15]] : index, i1, index)
+// NSW:         ^bb3:
+// NSW:           return %[[VAL_8]] : i1
+// NSW:         }
+
 // -----
 
 // iterate_while with an extra loop-carried value
@@ -155,7 +227,7 @@ func.func private @f4(i32) -> i1
 // CHECK:           cond_br %[[VAL_16]], ^bb2, ^bb3
 // CHECK:         ^bb2:
 // CHECK:           %[[VAL_17:.*]] = fir.call @f2() : () -> i1
-// CHECK:           %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index
 // CHECK:           br ^bb1(%[[VAL_18]], %[[VAL_6]], %[[VAL_17]] : index, i1, i1)
 // CHECK:         ^bb3:
 // CHECK:           %[[VAL_19:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
@@ -163,6 +235,34 @@ func.func private @f4(i32) -> i1
 // CHECK:         }
 // CHECK:         func private @f4(i32) -> i1
 
+// NSW-LABEL:   func @y3(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> i1 {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant true
+// NSW:           %[[VAL_4:.*]] = fir.call @f2() : () -> i1
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_4]] : index, i1, i1)
+// NSW:         ^bb1(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: i1):
+// NSW:           %[[VAL_8:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_8]], %[[VAL_2]] : index
+// NSW:           %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_5]], %[[VAL_1]] : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_8]] : index
+// NSW:           %[[VAL_12:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_5]] : index
+// NSW:           %[[VAL_13:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:           %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:           %[[VAL_15:.*]] = arith.ori %[[VAL_13]], %[[VAL_14]] : i1
+// NSW:           %[[VAL_16:.*]] = arith.andi %[[VAL_6]], %[[VAL_15]] : i1
+// NSW:           cond_br %[[VAL_16]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_17:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_18:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           br ^bb1(%[[VAL_18]], %[[VAL_6]], %[[VAL_17]] : index, i1, i1)
+// NSW:         ^bb3:
+// NSW:           %[[VAL_19:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
+// NSW:           return %[[VAL_19]] : i1
+// NSW:         }
+// NSW:         func private @f4(i32) -> i1
+
 // -----
 
 // do_loop that returns the final value of the induction
@@ -191,7 +291,7 @@ func.func @x4(%lo : index, %up : index) -> index {
 // CHECK:         ^bb2:
 // CHECK:           %[[VAL_10:.*]] = fir.convert %[[VAL_6]] : (index) -> i32
 // CHECK:           %[[VAL_11:.*]] = fir.call @f4(%[[VAL_10]]) : (i32) -> i1
-// CHECK:           %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] : index
 // CHECK:           %[[VAL_13:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_14:.*]] = arith.subi %[[VAL_7]], %[[VAL_13]] : index
 // CHECK:           br ^bb1(%[[VAL_12]], %[[VAL_14]] : index, index)
@@ -199,6 +299,29 @@ func.func @x4(%lo : index, %up : index) -> index {
 // CHECK:           return %[[VAL_6]] : index
 // CHECK:         }
 
+// NSW-LABEL:   func @x4(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:           %[[VAL_4:.*]] = arith.addi %[[VAL_3]], %[[VAL_2]] : index
+// NSW:           %[[VAL_5:.*]] = arith.divsi %[[VAL_4]], %[[VAL_2]] : index
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_5]] : index, index)
+// NSW:         ^bb1(%[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index):
+// NSW:           %[[VAL_8:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi sgt, %[[VAL_7]], %[[VAL_8]] : index
+// NSW:           cond_br %[[VAL_9]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_10:.*]] = fir.convert %[[VAL_6]] : (index) -> i32
+// NSW:           %[[VAL_11:.*]] = fir.call @f4(%[[VAL_10]]) : (i32) -> i1
+// NSW:           %[[VAL_12:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           %[[VAL_13:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_14:.*]] = arith.subi %[[VAL_7]], %[[VAL_13]] : index
+// NSW:           br ^bb1(%[[VAL_12]], %[[VAL_14]] : index, index)
+// NSW:         ^bb3:
+// NSW:           return %[[VAL_6]] : index
+// NSW:         }
+
 // -----
 
 // iterate_while that returns the final value of both inductions
@@ -233,12 +356,38 @@ func.func @y4(%lo : index, %up : index) -> index {
 // CHECK:         ^bb2:
 // CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
 // CHECK:           %[[VAL_16:.*]] = fir.call @f4(%[[VAL_15]]) : (i32) -> i1
-// CHECK:           %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index
 // CHECK:           br ^bb1(%[[VAL_17]], %[[VAL_16]] : index, i1)
 // CHECK:         ^bb3:
 // CHECK:           return %[[VAL_4]] : index
 // CHECK:         }
 
+// NSW-LABEL:   func @y4(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant true
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]] : index, i1)
+// NSW:         ^bb1(%[[VAL_4:.*]]: index, %[[VAL_5:.*]]: i1):
+// NSW:           %[[VAL_6:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_7:.*]] = arith.cmpi slt, %[[VAL_6]], %[[VAL_2]] : index
+// NSW:           %[[VAL_8:.*]] = arith.cmpi sle, %[[VAL_4]], %[[VAL_1]] : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_6]] : index
+// NSW:           %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_4]] : index
+// NSW:           %[[VAL_11:.*]] = arith.andi %[[VAL_7]], %[[VAL_8]] : i1
+// NSW:           %[[VAL_12:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:           %[[VAL_13:.*]] = arith.ori %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:           %[[VAL_14:.*]] = arith.andi %[[VAL_5]], %[[VAL_13]] : i1
+// NSW:           cond_br %[[VAL_14]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_15:.*]] = fir.convert %[[VAL_4]] : (index) -> i32
+// NSW:           %[[VAL_16:.*]] = fir.call @f4(%[[VAL_15]]) : (i32) -> i1
+// NSW:           %[[VAL_17:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           br ^bb1(%[[VAL_17]], %[[VAL_16]] : index, i1)
+// NSW:         ^bb3:
+// NSW:           return %[[VAL_4]] : index
+// NSW:         }
+
 // -----
 
 // do_loop that returns the final induction value
@@ -271,7 +420,7 @@ func.func @x5(%lo : index, %up : index) -> index {
 // CHECK:         ^bb2:
 // CHECK:           %[[VAL_12:.*]] = fir.call @f2() : () -> i1
 // CHECK:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> i16
-// CHECK:           %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] : index
 // CHECK:           %[[VAL_15:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_16:.*]] = arith.subi %[[VAL_9]], %[[VAL_15]] : index
 // CHECK:           br ^bb1(%[[VAL_14]], %[[VAL_13]], %[[VAL_16]] : index, i16, index)
@@ -280,6 +429,31 @@ func.func @x5(%lo : index, %up : index) -> index {
 // CHECK:           return %[[VAL_7]] : index
 // CHECK:         }
 
+// NSW-LABEL:   func @x5(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant 42 : i16
+// NSW:           %[[VAL_4:.*]] = arith.subi %[[VAL_1]], %[[VAL_0]] : index
+// NSW:           %[[VAL_5:.*]] = arith.addi %[[VAL_4]], %[[VAL_2]] : index
+// NSW:           %[[VAL_6:.*]] = arith.divsi %[[VAL_5]], %[[VAL_2]] : index
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_3]], %[[VAL_6]] : index, i16, index)
+// NSW:         ^bb1(%[[VAL_7:.*]]: index, %[[VAL_8:.*]]: i16, %[[VAL_9:.*]]: index):
+// NSW:           %[[VAL_10:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi sgt, %[[VAL_9]], %[[VAL_10]] : index
+// NSW:           cond_br %[[VAL_11]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_12:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i1) -> i16
+// NSW:           %[[VAL_14:.*]] = arith.addi %[[VAL_7]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           %[[VAL_15:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_16:.*]] = arith.subi %[[VAL_9]], %[[VAL_15]] : index
+// NSW:           br ^bb1(%[[VAL_14]], %[[VAL_13]], %[[VAL_16]] : index, i16, index)
+// NSW:         ^bb3:
+// NSW:           fir.call @f3(%[[VAL_8]]) : (i16) -> ()
+// NSW:           return %[[VAL_7]] : index
+// NSW:         }
+
 // -----
 
 // iterate_while that returns the both induction values
@@ -322,7 +496,7 @@ func.func @y5(%lo : index, %up : index) -> index {
 // CHECK:         ^bb2:
 // CHECK:           %[[VAL_17:.*]] = fir.call @f2() : () -> i1
 // CHECK:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> i16
-// CHECK:           %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] : index
 // CHECK:           br ^bb1(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]] : index, i1, i16)
 // CHECK:         ^bb3:
 // CHECK:           cond_br %[[VAL_6]], ^bb4, ^bb5
@@ -334,3 +508,37 @@ func.func @y5(%lo : index, %up : index) -> index {
 // CHECK:           fir.call @f3(%[[VAL_7]]) : (i16) -> ()
 // CHECK:           return %[[VAL_5]] : index
 // CHECK:         }
+
+// NSW-LABEL:   func @y5(
+// NSW-SAME:             %[[VAL_0:.*]]: index,
+// NSW-SAME:             %[[VAL_1:.*]]: index) -> index {
+// NSW:           %[[VAL_2:.*]] = arith.constant 1 : index
+// NSW:           %[[VAL_3:.*]] = arith.constant 42 : i16
+// NSW:           %[[VAL_4:.*]] = arith.constant true
+// NSW:           br ^bb1(%[[VAL_0]], %[[VAL_4]], %[[VAL_3]] : index, i1, i16)
+// NSW:         ^bb1(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: i16):
+// NSW:           %[[VAL_8:.*]] = arith.constant 0 : index
+// NSW:           %[[VAL_9:.*]] = arith.cmpi slt, %[[VAL_8]], %[[VAL_2]] : index
+// NSW:           %[[VAL_10:.*]] = arith.cmpi sle, %[[VAL_5]], %[[VAL_1]] : index
+// NSW:           %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_2]], %[[VAL_8]] : index
+// NSW:           %[[VAL_12:.*]] = arith.cmpi sle, %[[VAL_1]], %[[VAL_5]] : index
+// NSW:           %[[VAL_13:.*]] = arith.andi %[[VAL_9]], %[[VAL_10]] : i1
+// NSW:           %[[VAL_14:.*]] = arith.andi %[[VAL_11]], %[[VAL_12]] : i1
+// NSW:           %[[VAL_15:.*]] = arith.ori %[[VAL_13]], %[[VAL_14]] : i1
+// NSW:           %[[VAL_16:.*]] = arith.andi %[[VAL_6]], %[[VAL_15]] : i1
+// NSW:           cond_br %[[VAL_16]], ^bb2, ^bb3
+// NSW:         ^bb2:
+// NSW:           %[[VAL_17:.*]] = fir.call @f2() : () -> i1
+// NSW:           %[[VAL_18:.*]] = fir.convert %[[VAL_17]] : (i1) -> i16
+// NSW:           %[[VAL_19:.*]] = arith.addi %[[VAL_5]], %[[VAL_2]] overflow<nsw> : index
+// NSW:           br ^bb1(%[[VAL_19]], %[[VAL_17]], %[[VAL_18]] : index, i1, i16)
+// NSW:         ^bb3:
+// NSW:           cond_br %[[VAL_6]], ^bb4, ^bb5
+// NSW:         ^bb4:
+// NSW:           %[[VAL_20:.*]] = arith.constant 0 : i32
+// NSW:           %[[VAL_21:.*]] = fir.call @f4(%[[VAL_20]]) : (i32) -> i1
+// NSW:           br ^bb5
+// NSW:         ^bb5:
+// NSW:           fir.call @f3(%[[VAL_7]]) : (i16) -> ()
+// NSW:           return %[[VAL_5]] : index
+// NSW:         }
diff --git a/flang/test/Fir/loop02.fir b/flang/test/Fir/loop02.fir
index fb209a9dfeb4269..50948e0e7aa6b53 100644
--- a/flang/test/Fir/loop02.fir
+++ b/flang/test/Fir/loop02.fir
@@ -31,7 +31,7 @@ func.func private @y(%addr : !fir.ref<index>)
 // CHECK:           cond_br %[[VAL_13]], ^bb2, ^bb3
 // CHECK:         ^bb2:
 // CHECK:           fir.call @y(%[[VAL_0]]) : (!fir.ref<index>) -> ()
-// CHECK:           %[[VAL_14:.*]] = arith.addi %[[VAL_10]], %[[VAL_2]] overflow<nsw> : index
+// CHECK:           %[[VAL_14:.*]] = arith.addi %[[VAL_10]], %[[VAL_2]] : index
 // CHECK:           %[[VAL_15:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_16:.*]] = arith.subi %[[VAL_11]], %[[VAL_15]] : index
 // CHECK:           br ^bb1(%[[VAL_14]], %[[VAL_16]] : index, index)
@@ -54,7 +54,7 @@ func.func private @y(%addr : !fir.ref<index>)
 // NOOPT:           cond_br %[[VAL_9]], ^bb2, ^bb3
 // NOOPT:         ^bb2:
 // NOOPT:           fir.call @y(%[[VAL_0]]) : (!fir.ref<index>) -> ()
-// NOOPT:           %[[VAL_10:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] overflow<nsw> : index
+// NOOPT:           %[[VAL_10:.*]] = arith.addi %[[VAL_6]], %[[VAL_2]] : index
 // NOOPT:           %[[VAL_11:.*]] = arith.constant 1 : index
 // NOOPT:           %[[VAL_12:.*]] = arith.subi %[[VAL_7]], %[[VAL_11]] : index
 // NOOPT:           br ^bb1(%[[VAL_10]], %[[VAL_12]] : index, index)
diff --git a/flang/test/Lower/HLFIR/goto-do-body.f90 b/flang/test/Lower/HLFIR/goto-do-body.f90
index 5f5b09ccb8f7dcc..383b839e591e33d 100644
--- a/flang/test/Lower/HLFIR/goto-do-body.f90
+++ b/flang/test/Lower/HLFIR/goto-do-body.f90
@@ -40,7 +40,7 @@ subroutine sub1()
 ! CHECK:    %[[TMP5:.*]] = arith.subi %[[TMP4]], %[[C1]] : i32
 ! CHECK:    fir.store %[[TMP5]] to %[[TRIP]] : !fir.ref<i32>
 ! CHECK:    %[[TMP6:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
-! CHECK:    %[[TMP7:.*]] = arith.addi %[[TMP6]], %[[C1]] overflow<nsw> : i32
+! CHECK:    %[[TMP7:.*]] = arith.addi %[[TMP6]], %[[C1]] : i32
 ! CHECK:    fir.store %[[TMP7]] to %[[I]]#1 : !fir.ref<i32>
 ! CHECK:    cf.br ^[[HEADER]]
   end do
@@ -104,7 +104,7 @@ subroutine sub2()
 ! CHECK:    fir.store %[[TMP9]] to %[[TRIP]] : !fir.ref<i32>
 ! CHECK:    %[[TMP10:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
 ! CHECK:    %[[STEP_VAL:.*]] = fir.load %[[STEP_VAR]] : !fir.ref<i32>
-! CHECK:    %[[TMP11:.*]] = arith.addi %[[TMP10]], %[[STEP_VAL]] overflow<nsw> : i32
+! CHECK:    %[[TMP11:.*]] = arith.addi %[[TMP10]], %[[STEP_VAL]] : i32
 ! CHECK:    fir.store %[[TMP11]] to %[[I]]#1 : !fir.ref<i32>
 ! CHECK:    cf.br ^[[HEADER]]
   end do
diff --git a/flang/test/Lower/OpenMP/Todo/depend-clause.f90 b/flang/test/Lower/OpenMP/Todo/depend-clause.f90
new file mode 100644
index 000000000000000..74525888c91d6da
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/depend-clause.f90
@@ -0,0 +1,10 @@
+!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: Support for iterator modifiers is not implemented yet
+subroutine f00(x)
+  integer :: x(10)
+  !$omp task depend(iterator(i = 1:10), in: x(i))
+  x = 0
+  !$omp end task
+end
diff --git a/flang/test/Lower/OpenMP/Todo/scope.f90 b/flang/test/Lower/OpenMP/Todo/scope.f90
new file mode 100644
index 000000000000000..16a067dc8f256be
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/scope.f90
@@ -0,0 +1,13 @@
+! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s
+! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s -fopenmp-version=51 2>&1 | FileCheck %s
+
+! CHECK: not yet implemented: Scope construct
+program omp_scope
+  integer i
+  i = 10
+
+  !$omp scope private(i)
+  print *, "omp scope", i
+  !$omp end scope
+
+end program omp_scope
diff --git a/flang/test/Lower/OpenMP/master_taskloop.f90 b/flang/test/Lower/OpenMP/master_taskloop.f90
new file mode 100644
index 000000000000000..26f664b2662dcb0
--- /dev/null
+++ b/flang/test/Lower/OpenMP/master_taskloop.f90
@@ -0,0 +1,14 @@
+! This test checks lowering of OpenMP master taskloop Directive.
+
+! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine test_master_taskloop
+  integer :: i, j = 1
+  !CHECK: not yet implemented: Taskloop construct
+  !$omp master taskloop
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end master taskloop 
+end subroutine
diff --git a/flang/test/Lower/OpenMP/master_taskloop_simd.f90 b/flang/test/Lower/OpenMP/master_taskloop_simd.f90
new file mode 100644
index 000000000000000..e928afd65244a4d
--- /dev/null
+++ b/flang/test/Lower/OpenMP/master_taskloop_simd.f90
@@ -0,0 +1,14 @@
+! This test checks lowering of OpenMP master taskloop simd Directive.
+
+! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine test_master_taskloop_simd()
+  integer :: i, j = 1
+  !CHECK: not yet implemented: Composite TASKLOOP SIMD
+  !$omp master taskloop simd 
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end master taskloop simd
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90
new file mode 100644
index 000000000000000..086ed01d16d364d
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90
@@ -0,0 +1,14 @@
+! This test checks lowering of OpenMP parallel master taskloop simd Directive.
+
+! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine test_parallel_master_taskloop_simd
+  integer :: i, j = 1
+  !CHECK: not yet implemented: Composite TASKLOOP SIMD
+  !$omp parallel master taskloop simd 
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end parallel master taskloop simd
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop.f90
new file mode 100644
index 000000000000000..17ceb9496c8d342
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-master-taskloop.f90
@@ -0,0 +1,14 @@
+! This test checks lowering of OpenMP parallel master taskloop Directive.
+
+! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine test_parallel_master_taskloop
+  integer :: i, j = 1
+  !CHECK: not yet implemented: Taskloop construct
+  !$omp parallel master taskloop
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end parallel master taskloop 
+end subroutine
diff --git a/flang/test/Lower/OpenMP/parallel-master.f90 b/flang/test/Lower/OpenMP/parallel-master.f90
new file mode 100644
index 000000000000000..8f3ee31b328537e
--- /dev/null
+++ b/flang/test/Lower/OpenMP/parallel-master.f90
@@ -0,0 +1,16 @@
+! This test checks lowering of the parallel master combined construct.
+
+! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s
+! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func @_QPparallel_master
+subroutine parallel_master(x)
+  integer :: x
+  !CHECK: omp.parallel {
+  !CHECK: omp.master {
+  !$omp parallel master
+  x = 1
+  !$omp end parallel master
+  !CHECK: }
+  !CHECK: }
+end subroutine parallel_master
diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
index 99323e69113bcc6..5e76e8ff1663bf7 100644
--- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
+++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90
@@ -82,10 +82,10 @@
 ! CHECK:                 %[[VAL_15:.*]] = fir.load %[[PRIV_J_DECL]]#0 : !fir.ref<i32>
 ! CHECK:                 %[[VAL_16:.*]] = arith.addi %[[LOAD]], %[[VAL_15]] : i32
 ! CHECK:                 hlfir.assign %[[VAL_16]] to %[[PRIV_X_DECL]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] overflow<nsw> : index
+! CHECK:                 %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_11]] : index
 ! CHECK:                 %[[STEPCAST:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
 ! CHECK:                 %[[IVLOAD:.*]] = fir.load %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
-! CHECK:                 %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] overflow<nsw> :
+! CHECK:                 %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]]
 ! CHECK:                 fir.result %[[VAL_17]], %[[IVINC]] : index, i32
 ! CHECK:               }
 ! CHECK:               fir.store %[[VAL_12]]#1 to %[[PRIV_J_DECL]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
index ce45d09d77a22a1..a49eba69ff38cce 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
@@ -206,10 +206,10 @@ program reduce15
 ! CHECK:             %[[VAL_48:.*]] = fir.convert %[[VAL_47]] : (i32) -> i64
 ! CHECK:             %[[VAL_49:.*]] = hlfir.designate %[[VAL_46]] (%[[VAL_48]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, i64) -> !fir.ref<i32>
 ! CHECK:             hlfir.assign %[[VAL_45]] to %[[VAL_49]] : i32, !fir.ref<i32>
-! CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] overflow<nsw> : index
+! CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] : index
 ! CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_40]] : (index) -> i32
 ! CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<i32>
-! CHECK:             %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_51]] overflow<nsw> : i32
+! CHECK:             %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_51]] : i32
 ! CHECK:             fir.result %[[VAL_50]], %[[VAL_53]] : index, i32
 ! CHECK:           }
 ! CHECK:           fir.store %[[VAL_54:.*]]#1 to %[[VAL_3]]#1 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-variable.f90 b/flang/test/Lower/OpenMP/wsloop-variable.f90
index cc77ce754d97e9c..8d235c10fa1d602 100644
--- a/flang/test/Lower/OpenMP/wsloop-variable.f90
+++ b/flang/test/Lower/OpenMP/wsloop-variable.f90
@@ -150,10 +150,10 @@ subroutine wsloop_variable_sub
 !CHECK:                 %[[VAL_42:.*]] = arith.addi %[[VAL_40]], %[[VAL_41]] : i64
 !CHECK:                 %[[VAL_43:.*]] = fir.convert %[[VAL_42]] : (i64) -> f32
 !CHECK:                 hlfir.assign %[[VAL_43]] to %[[VAL_21]]#0 : f32, !fir.ref<f32>
-!CHECK:                 %[[VAL_44:.*]] = arith.addi %[[VAL_37]], %[[VAL_34]] overflow<nsw> : index
+!CHECK:                 %[[VAL_44:.*]] = arith.addi %[[VAL_37]], %[[VAL_34]] : index
 !CHECK:                 %[[VAL_45:.*]] = fir.convert %[[VAL_34]] : (index) -> i64
 !CHECK:                 %[[VAL_46:.*]] = fir.load %[[VAL_17]]#1 : !fir.ref<i64>
-!CHECK:                 %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_45]] overflow<nsw> : i64
+!CHECK:                 %[[VAL_47:.*]] = arith.addi %[[VAL_46]], %[[VAL_45]] : i64
 !CHECK:                 fir.result %[[VAL_44]], %[[VAL_47]] : index, i64
 !CHECK:               }
 !CHECK:               fir.store %[[VAL_48:.*]]#1 to %[[VAL_17]]#1 : !fir.ref<i64>
diff --git a/flang/test/Lower/array-character.f90 b/flang/test/Lower/array-character.f90
index 53adc5c02958c33..c93ef4be30823cd 100644
--- a/flang/test/Lower/array-character.f90
+++ b/flang/test/Lower/array-character.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -hlfir=false -fwrapv %s -o - | fir-opt --canonicalize --cse | FileCheck %s
+! RUN: bbc -hlfir=false %s -o - | fir-opt --canonicalize --cse | FileCheck %s
 
 ! CHECK-LABEL: func @_QPissue(
 ! CHECK-SAME:    %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}, %[[VAL_1:.*]]: !fir.boxchar<1>{{.*}}) {
diff --git a/flang/test/Lower/array-derived-assignments.f90 b/flang/test/Lower/array-derived-assignments.f90
index f4e51271d593689..71e61f651302a37 100644
--- a/flang/test/Lower/array-derived-assignments.f90
+++ b/flang/test/Lower/array-derived-assignments.f90
@@ -1,5 +1,5 @@
 ! Test derived type assignment lowering inside array expression
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s
 
 module array_derived_assign
   type simple_copy
diff --git a/flang/test/Lower/array-derived.f90 b/flang/test/Lower/array-derived.f90
index a0c55f5d88255f8..b5eb7621c90f109 100644
--- a/flang/test/Lower/array-derived.f90
+++ b/flang/test/Lower/array-derived.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s
 
 module cs
   type r
diff --git a/flang/test/Lower/array-elemental-calls-char-byval.f90 b/flang/test/Lower/array-elemental-calls-char-byval.f90
index 682191fc7895620..c321614e7fc5b53 100644
--- a/flang/test/Lower/array-elemental-calls-char-byval.f90
+++ b/flang/test/Lower/array-elemental-calls-char-byval.f90
@@ -1,6 +1,6 @@
 ! Test lowering of elemental calls with character argument
 ! with the VALUE attribute.
-! RUN: bbc -hlfir=false -fwrapv -o - %s | FileCheck %s
+! RUN: bbc -hlfir=false -o - %s | FileCheck %s
 
 
 module char_elem_byval
diff --git a/flang/test/Lower/array-elemental-calls-char.f90 b/flang/test/Lower/array-elemental-calls-char.f90
index 00e2f8e8f9c1270..603cc677805fc9c 100644
--- a/flang/test/Lower/array-elemental-calls-char.f90
+++ b/flang/test/Lower/array-elemental-calls-char.f90
@@ -1,6 +1,6 @@
 ! Test lowering of elemental calls with character argument
 ! without the VALUE attribute.
-! RUN: bbc -hlfir=false -fwrapv -o - %s | FileCheck %s
+! RUN: bbc -hlfir=false -o - %s | FileCheck %s
 
 module char_elem
 
diff --git a/flang/test/Lower/array-expression-assumed-size.f90 b/flang/test/Lower/array-expression-assumed-size.f90
index 2fbf315aff11485..ae35da951538b8c 100644
--- a/flang/test/Lower/array-expression-assumed-size.f90
+++ b/flang/test/Lower/array-expression-assumed-size.f90
@@ -1,5 +1,5 @@
 ! RUN: bbc --emit-fir -hlfir=false %s -o - | FileCheck %s
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck --check-prefix=PostOpt %s
+! RUN: bbc -hlfir=false %s -o - | FileCheck --check-prefix=PostOpt %s
 
 
 subroutine assumed_size_test(a)
diff --git a/flang/test/Lower/array-expression-slice-1.f90 b/flang/test/Lower/array-expression-slice-1.f90
index b597814bc0d9f14..152450902432901 100644
--- a/flang/test/Lower/array-expression-slice-1.f90
+++ b/flang/test/Lower/array-expression-slice-1.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -hlfir=false -fwrapv -o - --outline-intrinsics %s | FileCheck %s
+! RUN: bbc -hlfir=false -o - --outline-intrinsics %s | FileCheck %s
 
 ! CHECK-LABEL: func @_QQmain() attributes {fir.bindc_name = "p"} {
 ! CHECK-DAG:         %[[VAL_0:.*]] = arith.constant 10 : index
diff --git a/flang/test/Lower/array-substring.f90 b/flang/test/Lower/array-substring.f90
index 02101039120e9fc..2e283997e3e003a 100644
--- a/flang/test/Lower/array-substring.f90
+++ b/flang/test/Lower/array-substring.f90
@@ -1,7 +1,5 @@
 ! RUN: bbc -hlfir=false %s -o - | FileCheck %s
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s --check-prefix=NO-NSW
-
-! NO-NSW-NOT: overflow<nsw>
+! RUN: bbc -hlfir=false -integer-overflow %s -o - | FileCheck %s --check-prefix=NSW
 
 ! CHECK-LABEL: func @_QPtest(
 ! CHECK-SAME:     %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}) -> !fir.array<1x!fir.logical<4>> {
@@ -34,9 +32,8 @@
 ! CHECK:         %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
 ! CHECK:         %[[VAL_27:.*]] = fir.array_coor %[[VAL_8]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
 ! CHECK:         fir.store %[[VAL_26]] to %[[VAL_27]] : !fir.ref<!fir.logical<4>>
-! CHECK:         %[[VAL_15_NSW:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] overflow<nsw> : index
 ! CHECK:         %[[VAL_28:.*]] = arith.subi %[[VAL_13]], %[[VAL_1]] : index
-! CHECK:         br ^bb1(%[[VAL_15_NSW]], %[[VAL_28]] : index, index)
+! CHECK:         br ^bb1(%[[VAL_15]], %[[VAL_28]] : index, index)
 ! CHECK:       ^bb3:
 ! CHECK:         %[[VAL_29:.*]] = fir.load %[[VAL_8]] : !fir.ref<!fir.array<1x!fir.logical<4>>>
 ! CHECK:         return %[[VAL_29]] : !fir.array<1x!fir.logical<4>>
@@ -49,3 +46,42 @@ function test(C)
 
   test = C(1:1)(1:8) == (/'ABCDabcd'/) 
 end function test
+
+! NSW-LABEL: func @_QPtest(
+! NSW-SAME:     %[[VAL_0:.*]]: !fir.boxchar<1>{{.*}}) -> !fir.array<1x!fir.logical<4>> {
+! NSW-DAG:         %[[VAL_1:.*]] = arith.constant 1 : index
+! NSW-DAG:         %[[VAL_2:.*]] = arith.constant 0 : index
+! NSW-DAG:         %[[VAL_3:.*]] = arith.constant 0 : i32
+! NSW-DAG:         %[[VAL_4:.*]] = arith.constant 8 : index
+! NSW:         %[[VAL_6:.*]]:2 = fir.unboxchar %[[VAL_0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! NSW:         %[[VAL_7:.*]] = fir.convert %[[VAL_6]]#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<1x!fir.char<1,12>>>
+! NSW:         %[[VAL_8:.*]] = fir.alloca !fir.array<1x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFtestEtest"}
+! NSW:         %[[VAL_9:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+! NSW:         %[[VAL_10:.*]] = fir.slice %[[VAL_1]], %[[VAL_1]], %[[VAL_1]] : (index, index, index) -> !fir.slice<1>
+! NSW:         %[[VAL_11:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref<!fir.array<1x!fir.char<1,8>>>
+! NSW:         br ^bb1(%[[VAL_2]], %[[VAL_1]] : index, index)
+! NSW:       ^bb1(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+! NSW:         %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_13]], %[[VAL_2]] : index
+! NSW:         cond_br %[[VAL_14]], ^bb2, ^bb3
+! NSW:       ^bb2:
+! NSW:         %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : index
+! NSW:         %[[VAL_16:.*]] = fir.array_coor %[[VAL_7]](%[[VAL_9]]) {{\[}}%[[VAL_10]]] %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.char<1,12>>>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref<!fir.char<1,12>>
+! NSW:         %[[VAL_17:.*]] = fir.convert %[[VAL_16]] : (!fir.ref<!fir.char<1,12>>) -> !fir.ref<!fir.array<12x!fir.char<1>>>
+! NSW:         %[[VAL_18:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_2]] : (!fir.ref<!fir.array<12x!fir.char<1>>>, index) -> !fir.ref<!fir.char<1>>
+! NSW:         %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (!fir.ref<!fir.char<1>>) -> !fir.ref<!fir.char<1,?>>
+! NSW:         %[[VAL_20:.*]] = fir.array_coor %[[VAL_11]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.char<1,8>>>, !fir.shape<1>, index) -> !fir.ref<!fir.char<1,8>>
+! NSW:         %[[VAL_21:.*]] = fir.convert %[[VAL_19]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+! NSW:         %[[VAL_22:.*]] = fir.convert %[[VAL_20]] : (!fir.ref<!fir.char<1,8>>) -> !fir.ref<i8>
+! NSW:         %[[VAL_23:.*]] = fir.convert %[[VAL_4]] : (index) -> i64
+! NSW:         %[[VAL_24:.*]] = fir.call @_FortranACharacterCompareScalar1(%[[VAL_21]], %[[VAL_22]], %[[VAL_23]], %[[VAL_23]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64, i64) -> i32
+! NSW:         %[[VAL_25:.*]] = arith.cmpi eq, %[[VAL_24]], %[[VAL_3]] : i32
+! NSW:         %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (i1) -> !fir.logical<4>
+! NSW:         %[[VAL_27:.*]] = fir.array_coor %[[VAL_8]](%[[VAL_9]]) %[[VAL_15]] : (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.shape<1>, index) -> !fir.ref<!fir.logical<4>>
+! NSW:         fir.store %[[VAL_26]] to %[[VAL_27]] : !fir.ref<!fir.logical<4>>
+! NSW:         %[[VAL_15_NSW:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] overflow<nsw> : index
+! NSW:         %[[VAL_28:.*]] = arith.subi %[[VAL_13]], %[[VAL_1]] : index
+! NSW:         br ^bb1(%[[VAL_15_NSW]], %[[VAL_28]] : index, index)
+! NSW:       ^bb3:
+! NSW:         %[[VAL_29:.*]] = fir.load %[[VAL_8]] : !fir.ref<!fir.array<1x!fir.logical<4>>>
+! NSW:         return %[[VAL_29]] : !fir.array<1x!fir.logical<4>>
+! NSW:       }
diff --git a/flang/test/Lower/array-temp.f90 b/flang/test/Lower/array-temp.f90
index 718aef84a4e853c..10c5ee91d44bdae 100644
--- a/flang/test/Lower/array-temp.f90
+++ b/flang/test/Lower/array-temp.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s
 
 ! CHECK-LABEL: func @_QPss1()
 subroutine ss1
diff --git a/flang/test/Lower/components.f90 b/flang/test/Lower/components.f90
index 28e836c5d10456a..e1582a8a31e0d4f 100644
--- a/flang/test/Lower/components.f90
+++ b/flang/test/Lower/components.f90
@@ -1,4 +1,4 @@
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s
 
 module components_test
   type t1
diff --git a/flang/test/Lower/do_loop.f90 b/flang/test/Lower/do_loop.f90
index 5d8343b8d68a45a..a46e6c947391b79 100644
--- a/flang/test/Lower/do_loop.f90
+++ b/flang/test/Lower/do_loop.f90
@@ -1,17 +1,17 @@
 ! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -o - %s | FileCheck %s
 ! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s
-! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -fwrapv -o - %s | FileCheck %s --check-prefix=NO-NSW
+! RUN: %flang_fc1 -mllvm --use-desc-for-alloc=false -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW
 
 ! Simple tests for structured ordered loops with loop-control.
 ! Tests the structure of the loop, storage to index variable and return and 
 ! storage of the final value of the index variable.
 
-! NO-NSW-NOT: overflow<nsw>
-
 ! Test a simple loop with the final value of the index variable read outside the loop
 ! CHECK-LABEL: simple_loop
+! NSW-LABEL:   simple_loop
 subroutine simple_loop
   ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_loopEi"}
   integer :: i
 
   ! CHECK: %[[C1:.*]] = arith.constant 1 : i32
@@ -21,14 +21,18 @@ subroutine simple_loop
   ! CHECK: %[[C1:.*]] = arith.constant 1 : index
   ! CHECK: %[[LB:.*]] = fir.convert %[[C1_CVT]] : (index) -> i32
   ! CHECK: %[[LI_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[LI_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[C1_CVT]] to %[[C5_CVT]] step %[[C1]]
   ! CHECK-SAME: iter_args(%[[IV:.*]] = %[[LB]]) -> (index, i32) {
   do i=1,5
   ! CHECK:   fir.store %[[IV]] to %[[I_REF]] : !fir.ref<i32>
-  ! CHECK:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1]] overflow<nsw> : index
+  ! CHECK:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1]] : index
+  ! NSW:     %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[C1:.*]] overflow<nsw> : index
   ! CHECK:   %[[STEPCAST:.*]] = fir.convert %[[C1]] : (index) -> i32
   ! CHECK:   %[[IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
-  ! CHECK:   %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] overflow<nsw> : i32
+  ! NSW:     %[[IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+  ! CHECK:   %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST]] : i32
+  ! NSW:     %[[IVINC:.*]] = arith.addi %[[IVLOAD]], %[[STEPCAST:.*]] overflow<nsw> : i32
   ! CHECK:  fir.result %[[LI_NEXT]], %[[IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -40,11 +44,14 @@ subroutine simple_loop
 
 ! Test a 2-nested loop with a body composed of a reduction. Values are read from a 2d array.
 ! CHECK-LABEL: nested_loop
+! NSW-LABEL:   nested_loop
 subroutine nested_loop
   ! CHECK: %[[ARR_REF:.*]] = fir.alloca !fir.array<5x5xi32> {bindc_name = "arr", uniq_name = "_QFnested_loopEarr"}
   ! CHECK: %[[ASUM_REF:.*]] = fir.alloca i32 {bindc_name = "asum", uniq_name = "_QFnested_loopEasum"}
   ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_loopEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_loopEi"}
   ! CHECK: %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_loopEj"}
+  ! NSW:   %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_loopEj"}
   integer :: asum, arr(5,5)
   integer :: i, j
   asum = 0
@@ -55,6 +62,7 @@ subroutine nested_loop
   ! CHECK: %[[ST_I:.*]] = arith.constant 1 : index
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_I_CVT]] : (index) -> i32
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[S_I_CVT]] to %[[E_I_CVT]] step %[[ST_I]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do i=1,5
@@ -66,6 +74,7 @@ subroutine nested_loop
     ! CHECK: %[[ST_J:.*]] = arith.constant 1 : index
     ! CHECK: %[[J_LB:.*]] = fir.convert %[[S_J_CVT]] : (index) -> i32
     ! CHECK: %[[J_RES:.*]]:2 = fir.do_loop %[[LJ:[^ ]*]] =
+    ! NSW:   %[[J_RES:.*]]:2 = fir.do_loop %[[LJ:[^ ]*]] =
     ! CHECK-SAME: %[[S_J_CVT]] to %[[E_J_CVT]] step %[[ST_J]]
     ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) {
     do j=1,5
@@ -84,18 +93,24 @@ subroutine nested_loop
       ! CHECK: %[[ASUM_NEW:.*]] = arith.addi %[[ASUM]], %[[ARR_VAL]] : i32
       ! CHECK: fir.store %[[ASUM_NEW]] to %[[ASUM_REF]] : !fir.ref<i32>
       asum = asum + arr(i,j)
-      ! CHECK: %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J]] overflow<nsw> : index
+      ! CHECK: %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J]] : index
+      ! NSW:   %[[LJ_NEXT:.*]] = arith.addi %[[LJ]], %[[ST_J:.*]] overflow<nsw> : index
       ! CHECK: %[[J_STEPCAST:.*]] = fir.convert %[[ST_J]] : (index) -> i32
       ! CHECK: %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
-      ! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow<nsw> : i32
+      ! NSW:   %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
+      ! CHECK: %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] : i32
+      ! NSW:   %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST:.*]] overflow<nsw> : i32
       ! CHECK: fir.result %[[LJ_NEXT]], %[[J_IVINC]] : index, i32
     ! CHECK: }
     end do
     ! CHECK: fir.store %[[J_RES]]#1 to %[[J_REF]] : !fir.ref<i32>
-    ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I]] overflow<nsw> : index
+    ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I]] : index
+    ! NSW:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_I:.*]] overflow<nsw> : index
     ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_I]] : (index) -> i32
     ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
-    ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow<nsw> : i32
+    ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+    ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+    ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
     ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -104,9 +119,11 @@ subroutine nested_loop
 
 ! Test a downcounting loop
 ! CHECK-LABEL: down_counting_loop
+! NSW-LABEL:   down_counting_loop
 subroutine down_counting_loop()
   integer :: i
   ! CHECK: %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdown_counting_loopEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdown_counting_loopEi"}
 
   ! CHECK: %[[C5:.*]] = arith.constant 5 : i32
   ! CHECK: %[[C5_CVT:.*]] = fir.convert %[[C5]] : (i32) -> index
@@ -116,14 +133,18 @@ subroutine down_counting_loop()
   ! CHECK: %[[CMINUS1_STEP_CVT:.*]] = fir.convert %[[CMINUS1]] : (i32) -> index
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[C5_CVT]] : (index) -> i32
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[C5_CVT]] to %[[C1_CVT]] step %[[CMINUS1_STEP_CVT]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do i=5,1,-1
   ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref<i32>
-  ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT]] overflow<nsw> : index
+  ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT]] : index
+  ! NSW:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[CMINUS1_STEP_CVT:.*]] overflow<nsw> : index
   ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[CMINUS1_STEP_CVT]] : (index) -> i32
   ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
-  ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow<nsw> : i32
+  ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+  ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+  ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
   ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -132,6 +153,7 @@ subroutine down_counting_loop()
 
 ! Test a general loop with a variable step
 ! CHECK-LABEL: loop_with_variable_step
+! NSW-LABEL:   loop_with_variable_step
 ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "s"}, %[[E_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "e"}, %[[ST_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "st"}) {
 subroutine loop_with_variable_step(s,e,st)
   integer :: s, e, st
@@ -144,14 +166,18 @@ subroutine loop_with_variable_step(s,e,st)
   ! CHECK: %[[ST_CVT:.*]] = fir.convert %[[ST]] : (i32) -> index
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i32
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do i=s,e,st
   ! CHECK:  fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref<i32>
-  ! CHECK:  %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] overflow<nsw> : index
+  ! CHECK:  %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index
+  ! NSW:    %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow<nsw> : index
   ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i32
   ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
-  ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow<nsw> : i32
+  ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+  ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+  ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
   ! CHECK:  fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   ! CHECK: }
   end do
@@ -160,11 +186,13 @@ subroutine loop_with_variable_step(s,e,st)
 
 ! Test usage of pointer variables as index, start, end and step variables
 ! CHECK-LABEL: loop_with_pointer_variables
+! NSW-LABEL:   loop_with_pointer_variables
 ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "s", fir.target}, %[[E_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "e", fir.target}, %[[ST_REF:.*]]: !fir.ref<i32> {fir.bindc_name = "st", fir.target}) {
 subroutine loop_with_pointer_variables(s,e,st)
 ! CHECK:  %[[E_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEeptr.addr"}
 ! CHECK:  %[[I_REF:.*]] = fir.alloca i32 {bindc_name = "i", fir.target, uniq_name = "_QFloop_with_pointer_variablesEi"}
 ! CHECK:  %[[I_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEiptr.addr"}
+! NSW:    %[[I_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEiptr.addr"}
 ! CHECK:  %[[S_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEsptr.addr"}
 ! CHECK:  %[[ST_PTR_REF:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFloop_with_pointer_variablesEstptr.addr"}
   integer, target :: i
@@ -185,6 +213,7 @@ subroutine loop_with_pointer_variables(s,e,st)
   stptr => st
 
 ! CHECK:  %[[I_PTR:.*]] = fir.load %[[I_PTR_REF]] : !fir.ref<!fir.ptr<i32>>
+! NSW:    %[[I_PTR:.*]] = fir.load %[[I_PTR_REF]] : !fir.ref<!fir.ptr<i32>>
 ! CHECK:  %[[S_PTR:.*]] = fir.load %[[S_PTR_REF]] : !fir.ref<!fir.ptr<i32>>
 ! CHECK:  %[[S:.*]] = fir.load %[[S_PTR]] : !fir.ptr<i32>
 ! CHECK:  %[[S_CVT:.*]] = fir.convert %[[S]] : (i32) -> index
@@ -196,14 +225,18 @@ subroutine loop_with_pointer_variables(s,e,st)
 ! CHECK:  %[[ST_CVT:.*]] = fir.convert %[[ST]] : (i32) -> index
 ! CHECK:  %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i32
 ! CHECK:  %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+! NSW:    %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
 ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]]
 ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i32) {
   do iptr=sptr,eptr,stptr
 ! CHECK:    fir.store %[[I_IV]] to %[[I_PTR]] : !fir.ptr<i32>
-! CHECK:    %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] overflow<nsw> : index
+! CHECK:    %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index
+! NSW:      %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow<nsw> : index
 ! CHECK:    %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i32
 ! CHECK:    %[[I_IVLOAD:.*]] = fir.load %[[I_PTR]] : !fir.ptr<i32>
-! CHECK:    %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow<nsw> : i32
+! NSW:      %[[I_IVLOAD:.*]] = fir.load %[[I_PTR]] : !fir.ptr<i32>
+! CHECK:    %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
+! NSW:      %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i32
 ! CHECK:    fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i32
   end do
 ! CHECK:  }
@@ -212,9 +245,11 @@ subroutine loop_with_pointer_variables(s,e,st)
 
 ! Test usage of non-default integer kind for loop control and loop index variable
 ! CHECK-LABEL: loop_with_non_default_integer
+! NSW-LABEL:   loop_with_non_default_integer
 ! CHECK-SAME: (%[[S_REF:.*]]: !fir.ref<i64> {fir.bindc_name = "s"}, %[[E_REF:.*]]: !fir.ref<i64> {fir.bindc_name = "e"}, %[[ST_REF:.*]]: !fir.ref<i64> {fir.bindc_name = "st"}) {
 subroutine loop_with_non_default_integer(s,e,st)
   ! CHECK: %[[I_REF:.*]] = fir.alloca i64 {bindc_name = "i", uniq_name = "_QFloop_with_non_default_integerEi"}
+  ! NSW:   %[[I_REF:.*]] = fir.alloca i64 {bindc_name = "i", uniq_name = "_QFloop_with_non_default_integerEi"}
   integer(kind=8):: i
   ! CHECK: %[[S:.*]] = fir.load %[[S_REF]] : !fir.ref<i64>
   ! CHECK: %[[S_CVT:.*]] = fir.convert %[[S]] : (i64) -> index
@@ -226,14 +261,18 @@ subroutine loop_with_non_default_integer(s,e,st)
 
   ! CHECK: %[[I_LB:.*]] = fir.convert %[[S_CVT]] : (index) -> i64
   ! CHECK: %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
+  ! NSW:   %[[I_RES:.*]]:2 = fir.do_loop %[[LI:[^ ]*]] =
   ! CHECK-SAME: %[[S_CVT]] to %[[E_CVT]] step %[[ST_CVT]]
   ! CHECK-SAME: iter_args(%[[I_IV:.*]] = %[[I_LB]]) -> (index, i64) {
   do i=s,e,st
     ! CHECK: fir.store %[[I_IV]] to %[[I_REF]] : !fir.ref<i64>
-    ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] overflow<nsw> : index
+    ! CHECK: %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT]] : index
+    ! NSW:   %[[LI_NEXT:.*]] = arith.addi %[[LI]], %[[ST_CVT:.*]] overflow<nsw> : index
     ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[ST_CVT]] : (index) -> i64
     ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i64>
-    ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow<nsw> : i64
+    ! NSW:   %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i64>
+    ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i64
+    ! NSW:   %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST:.*]] overflow<nsw> : i64
     ! CHECK: fir.result %[[LI_NEXT]], %[[I_IVINC]] : index, i64
   end do
   ! CHECK: }
diff --git a/flang/test/Lower/do_loop_unstructured.f90 b/flang/test/Lower/do_loop_unstructured.f90
index d8890b2d0926ead..e1a669e09c9a895 100644
--- a/flang/test/Lower/do_loop_unstructured.f90
+++ b/flang/test/Lower/do_loop_unstructured.f90
@@ -1,11 +1,9 @@
 ! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s
-! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fwrapv -o - %s | FileCheck %s --check-prefix=NO-NSW
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW
 
 ! Tests for unstructured loops.
 
-! NO-NSW-NOT: overflow<nsw>
-
 ! Test a simple unstructured loop. Test for the existence of,
 ! -> The initialization of the trip-count and loop-variable
 ! -> The branch to the body or the exit inside the header
@@ -41,12 +39,42 @@ subroutine simple_unstructured()
 ! CHECK:   fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
 ! CHECK:   %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
 ! CHECK:   %[[STEP_ONE_2:.*]] = arith.constant 1 : i32
-! CHECK:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] overflow<nsw> : i32
+! CHECK:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] : i32
 ! CHECK:   fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
 ! CHECK:   cf.br ^[[HEADER]]
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
 
+! NSW-LABEL: simple_unstructured
+! NSW:   %[[TRIP_VAR_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructuredEi"}
+! NSW:   %[[ONE:.*]] = arith.constant 1 : i32
+! NSW:   %[[HUNDRED:.*]] = arith.constant 100 : i32
+! NSW:   %[[STEP_ONE:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP_ONE]] : i32
+! NSW:   %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP_ONE]] : i32
+! NSW:   fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER:.*]]
+! NSW: ^[[HEADER]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
+! NSW:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
+! NSW: ^[[BODY]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[STEP_ONE_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_ONE_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER]]
+! NSW: ^[[EXIT]]:
+! NSW:   return
+
 ! Test an unstructured loop with a step. Mostly similar to the previous one.
 ! Only difference is a non-unit step.
 subroutine simple_unstructured_with_step()
@@ -80,12 +108,42 @@ subroutine simple_unstructured_with_step()
 ! CHECK:   fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
 ! CHECK:   %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
 ! CHECK:   %[[STEP_2:.*]] = arith.constant 2 : i32
-! CHECK:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] overflow<nsw> : i32
+! CHECK:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] : i32
 ! CHECK:   fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
 ! CHECK:   cf.br ^[[HEADER]]
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
 
+! NSW-LABEL: simple_unstructured_with_step
+! NSW:   %[[TRIP_VAR_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsimple_unstructured_with_stepEi"}
+! NSW:   %[[ONE:.*]] = arith.constant 1 : i32
+! NSW:   %[[HUNDRED:.*]] = arith.constant 100 : i32
+! NSW:   %[[STEP:.*]] = arith.constant 2 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[HUNDRED]], %[[ONE]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[STEP]] : i32
+! NSW:   %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[ONE]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER:.*]]
+! NSW: ^[[HEADER]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
+! NSW:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
+! NSW: ^[[BODY]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_NEXT:.*]] = arith.subi %[[TRIP_VAR]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_NEXT]] to %[[TRIP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR:.*]] = fir.load %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   %[[STEP_2:.*]] = arith.constant 2 : i32
+! NSW:   %[[LOOP_VAR_NEXT:.*]] = arith.addi %[[LOOP_VAR]], %[[STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_NEXT]] to %[[LOOP_VAR_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER]]
+! NSW: ^[[EXIT]]:
+! NSW:   return
+
 ! Test a three nested unstructured loop. Three nesting is the basic case where
 ! we have loops that are neither innermost or outermost.
 subroutine nested_unstructured()
@@ -157,7 +215,7 @@ subroutine nested_unstructured()
 ! CHECK:   fir.store %[[TRIP_VAR_K_NEXT]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
 ! CHECK:   %[[LOOP_VAR_K:.*]] = fir.load %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
 ! CHECK:   %[[K_STEP_2:.*]] = arith.constant 1 : i32
-! CHECK:   %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] overflow<nsw> : i32
+! CHECK:   %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] : i32
 ! CHECK:   fir.store %[[LOOP_VAR_K_NEXT]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
 ! CHECK:   cf.br ^[[HEADER_K]]
 ! CHECK: ^[[EXIT_K]]:
@@ -167,7 +225,7 @@ subroutine nested_unstructured()
 ! CHECK:   fir.store %[[TRIP_VAR_J_NEXT]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
 ! CHECK:   %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
 ! CHECK:   %[[J_STEP_2:.*]] = arith.constant 1 : i32
-! CHECK:   %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] overflow<nsw> : i32
+! CHECK:   %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] : i32
 ! CHECK:   fir.store %[[LOOP_VAR_J_NEXT]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
 ! CHECK:   cf.br ^[[HEADER_J]]
 ! CHECK: ^[[EXIT_J]]:
@@ -177,12 +235,96 @@ subroutine nested_unstructured()
 ! CHECK:   fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   %[[I_STEP_2:.*]] = arith.constant 1 : i32
-! CHECK:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow<nsw> : i32
+! CHECK:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] : i32
 ! CHECK:   fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   cf.br ^[[HEADER_I]]
 ! CHECK: ^[[EXIT_I]]:
 ! CHECK:   return
 
+! NSW-LABEL: nested_unstructured
+! NSW:   %[[TRIP_VAR_K_REF:.*]] = fir.alloca i32
+! NSW:   %[[TRIP_VAR_J_REF:.*]] = fir.alloca i32
+! NSW:   %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_unstructuredEi"}
+! NSW:   %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_unstructuredEj"}
+! NSW:   %[[LOOP_VAR_K_REF:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFnested_unstructuredEk"}
+! NSW:   %[[I_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[I_END:.*]] = arith.constant 100 : i32
+! NSW:   %[[I_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32
+! NSW:   %[[TRIP_COUNT_I:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT_I]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_I:.*]]
+! NSW: ^[[HEADER_I]]:
+! NSW:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO_1:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND_I:.*]] = arith.cmpi sgt, %[[TRIP_VAR_I]], %[[ZERO_1]] : i32
+! NSW:   cf.cond_br %[[COND_I]], ^[[BODY_I:.*]], ^[[EXIT_I:.*]]
+! NSW: ^[[BODY_I]]:
+! NSW:   %[[J_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[J_END:.*]] = arith.constant 200 : i32
+! NSW:   %[[J_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP3:.*]] = arith.subi %[[J_END]], %[[J_START]] : i32
+! NSW:   %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[J_STEP]] : i32
+! NSW:   %[[TRIP_COUNT_J:.*]] = arith.divsi %[[TMP4]], %[[J_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT_J]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[J_START]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_J:.*]]
+! NSW: ^[[HEADER_J]]:
+! NSW:   %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO_2:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND_J:.*]] = arith.cmpi sgt, %[[TRIP_VAR_J]], %[[ZERO_2]] : i32
+! NSW:   cf.cond_br %[[COND_J]], ^[[BODY_J:.*]], ^[[EXIT_J:.*]]
+! NSW: ^[[BODY_J]]:
+! NSW:   %[[K_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[K_END:.*]] = arith.constant 300 : i32
+! NSW:   %[[K_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP3:.*]] = arith.subi %[[K_END]], %[[K_START]] : i32
+! NSW:   %[[TMP4:.*]] = arith.addi %[[TMP3]], %[[K_STEP]] : i32
+! NSW:   %[[TRIP_COUNT_K:.*]] = arith.divsi %[[TMP4]], %[[K_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT_K]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[K_START]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_K:.*]]
+! NSW: ^[[HEADER_K]]:
+! NSW:   %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO_2:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND_K:.*]] = arith.cmpi sgt, %[[TRIP_VAR_K]], %[[ZERO_2]] : i32
+! NSW:   cf.cond_br %[[COND_K]], ^[[BODY_K:.*]], ^[[EXIT_K:.*]]
+! NSW: ^[[BODY_K]]:
+! NSW:   %[[TRIP_VAR_K:.*]] = fir.load %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_K_NEXT:.*]] = arith.subi %[[TRIP_VAR_K]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_K_NEXT]] to %[[TRIP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_K:.*]] = fir.load %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   %[[K_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_K_NEXT:.*]] = arith.addi %[[LOOP_VAR_K]], %[[K_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_K_NEXT]] to %[[LOOP_VAR_K_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_K]]
+! NSW: ^[[EXIT_K]]:
+! NSW:   %[[TRIP_VAR_J:.*]] = fir.load %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_J_NEXT:.*]] = arith.subi %[[TRIP_VAR_J]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_J_NEXT]] to %[[TRIP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   %[[J_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %[[J_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_J_NEXT]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_J]]
+! NSW: ^[[EXIT_J]]:
+! NSW:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[ONE_1:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[ONE_1]] : i32
+! NSW:   fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[I_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER_I]]
+! NSW: ^[[EXIT_I]]:
+! NSW:   return
+
 ! Test the existence of a structured loop inside an unstructured loop.
 ! Only minimal checks are inserted for the structured loop.
 subroutine nested_structured_in_unstructured()
@@ -217,9 +359,9 @@ subroutine nested_structured_in_unstructured()
 ! CHECK-SAME: %{{.*}} to %{{.*}} step %[[ST:[^ ]*]]
 ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %{{.*}}) -> (index, i32) {
 ! CHECK:     fir.store %[[J_IV]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
-! CHECK:     %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] overflow<nsw> : index
+! CHECK:     %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] : index
 ! CHECK:     %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
-! CHECK:     %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} overflow<nsw> : i32
+! CHECK:     %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} : i32
 ! CHECK:   }
 ! CHECK:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   %[[C1_3:.*]] = arith.constant 1 : i32
@@ -227,8 +369,47 @@ subroutine nested_structured_in_unstructured()
 ! CHECK:   fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   %[[I_STEP_2:.*]] = arith.constant 1 : i32
-! CHECK:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow<nsw> : i32
+! CHECK:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] : i32
 ! CHECK:   fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
 ! CHECK:   cf.br ^[[HEADER]]
 ! CHECK: ^[[EXIT]]:
 ! CHECK:   return
+
+! NSW-LABEL: nested_structured_in_unstructured
+! NSW:   %[[TRIP_VAR_I_REF:.*]] = fir.alloca i32
+! NSW:   %[[LOOP_VAR_I_REF:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFnested_structured_in_unstructuredEi"}
+! NSW:   %[[LOOP_VAR_J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFnested_structured_in_unstructuredEj"}
+! NSW:   %[[I_START:.*]] = arith.constant 1 : i32
+! NSW:   %[[I_END:.*]] = arith.constant 100 : i32
+! NSW:   %[[I_STEP:.*]] = arith.constant 1 : i32
+! NSW:   %[[TMP1:.*]] = arith.subi %[[I_END]], %[[I_START]] : i32
+! NSW:   %[[TMP2:.*]] = arith.addi %[[TMP1]], %[[I_STEP]] : i32
+! NSW:   %[[TRIP_COUNT:.*]] = arith.divsi %[[TMP2]], %[[I_STEP]] : i32
+! NSW:   fir.store %[[TRIP_COUNT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   fir.store %[[I_START]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER:.*]]
+! NSW: ^[[HEADER]]:
+! NSW:   %[[TRIP_VAR:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[ZERO:.*]] = arith.constant 0 : i32
+! NSW:   %[[COND:.*]] = arith.cmpi sgt, %[[TRIP_VAR]], %[[ZERO]] : i32
+! NSW:   cf.cond_br %[[COND]], ^[[BODY:.*]], ^[[EXIT:.*]]
+! NSW: ^[[BODY]]:
+! NSW:   %{{.*}} = fir.do_loop %[[J_INDEX:[^ ]*]] =
+! NSW-SAME: %{{.*}} to %{{.*}} step %[[ST:[^ ]*]]
+! NSW-SAME: iter_args(%[[J_IV:.*]] = %{{.*}}) -> (index, i32) {
+! NSW:     fir.store %[[J_IV]] to %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:     %[[J_INDEX_NEXT:.*]] = arith.addi %[[J_INDEX]], %[[ST]] overflow<nsw> : index
+! NSW:     %[[LOOP_VAR_J:.*]] = fir.load %[[LOOP_VAR_J_REF]] : !fir.ref<i32>
+! NSW:     %[[LOOP_VAR_J_NEXT:.*]] = arith.addi %[[LOOP_VAR_J]], %{{[^ ]*}} overflow<nsw> : i32
+! NSW:   }
+! NSW:   %[[TRIP_VAR_I:.*]] = fir.load %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[C1_3:.*]] = arith.constant 1 : i32
+! NSW:   %[[TRIP_VAR_I_NEXT:.*]] = arith.subi %[[TRIP_VAR_I]], %[[C1_3]] : i32
+! NSW:   fir.store %[[TRIP_VAR_I_NEXT]] to %[[TRIP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[LOOP_VAR_I:.*]] = fir.load %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   %[[I_STEP_2:.*]] = arith.constant 1 : i32
+! NSW:   %[[LOOP_VAR_I_NEXT:.*]] = arith.addi %[[LOOP_VAR_I]], %[[I_STEP_2]] overflow<nsw> : i32
+! NSW:   fir.store %[[LOOP_VAR_I_NEXT]] to %[[LOOP_VAR_I_REF]] : !fir.ref<i32>
+! NSW:   cf.br ^[[HEADER]]
+! NSW: ^[[EXIT]]:
+! NSW:   return
diff --git a/flang/test/Lower/goto-do-body.f90 b/flang/test/Lower/goto-do-body.f90
index 89e4a7a64a87bac..910e55f1839fd2a 100644
--- a/flang/test/Lower/goto-do-body.f90
+++ b/flang/test/Lower/goto-do-body.f90
@@ -48,7 +48,7 @@ subroutine sub1()
 ! CHECK:    fir.store %[[TMP8]] to %[[TRIP]] : !fir.ref<i32>
 ! CHECK:    %[[TMP9:.*]] = fir.load %[[I]] : !fir.ref<i32>
 ! CHECK:    %[[C1_4:.*]] = arith.constant 1 : i32
-! CHECK:    %[[TMP10:.*]] = arith.addi %[[TMP9]], %[[C1_4]] overflow<nsw> : i32
+! CHECK:    %[[TMP10:.*]] = arith.addi %[[TMP9]], %[[C1_4]] : i32
 ! CHECK:    fir.store %[[TMP10]] to %[[I]] : !fir.ref<i32>
 ! CHECK:    cf.br ^[[HEADER]]
   end do
@@ -115,7 +115,7 @@ subroutine sub2()
 ! CHECK:    fir.store %[[TMP10]] to %[[TRIP]] : !fir.ref<i32>
 ! CHECK:    %[[TMP11:.*]] = fir.load %[[I]] : !fir.ref<i32>
 ! CHECK:    %[[STEP_VAL:.*]] = fir.load %[[STEP_VAR]] : !fir.ref<i32>
-! CHECK:    %[[TMP12:.*]] = arith.addi %[[TMP11]], %[[STEP_VAL]] overflow<nsw> : i32
+! CHECK:    %[[TMP12:.*]] = arith.addi %[[TMP11]], %[[STEP_VAL]] : i32
 ! CHECK:    fir.store %[[TMP12]] to %[[I]] : !fir.ref<i32>
 ! CHECK:    cf.br ^[[HEADER]]
   end do
diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90
index 33acdff1bb74cf6..9b4269df7bfcb67 100644
--- a/flang/test/Lower/host-associated.f90
+++ b/flang/test/Lower/host-associated.f90
@@ -1,5 +1,5 @@
 ! Test internal procedure host association lowering.
-! RUN: bbc -hlfir=false -fwrapv %s -o - | FileCheck %s
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s
 
 ! -----------------------------------------------------------------------------
 !     Test non character intrinsic scalars
diff --git a/flang/test/Lower/infinite_loop.f90 b/flang/test/Lower/infinite_loop.f90
index de0bee779c5b623..6942dda8d7a23a1 100644
--- a/flang/test/Lower/infinite_loop.f90
+++ b/flang/test/Lower/infinite_loop.f90
@@ -1,11 +1,9 @@
 ! RUN: bbc -emit-fir -hlfir=false -o - %s | FileCheck %s
 ! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -o - %s | FileCheck %s
-! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -fwrapv -o - %s | FileCheck %s --check-prefix=NO-NSW
+! RUN: %flang_fc1 -emit-fir -flang-deprecated-no-hlfir -flang-experimental-integer-overflow -o - %s | FileCheck %s --check-prefix=NSW
 
 ! Tests for infinite loop.
 
-! NO-NSW-NOT: overflow<nsw>
-
 subroutine empty_infinite()
   do
   end do
@@ -98,10 +96,10 @@ subroutine structured_loop_in_infinite(i)
 ! CHECK-SAME: %[[C1_INDEX]] to %[[C10_INDEX]] step %[[C1_1]]
 ! CHECK-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) {
 ! CHECK:    fir.store %[[J_IV]] to %[[J_REF]] : !fir.ref<i32>
-! CHECK:    %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] overflow<nsw> : index
+! CHECK:    %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] : index
 ! CHECK:    %[[J_STEPCAST:.*]] = fir.convert %[[C1_1]] : (index) -> i32
 ! CHECK:    %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
-! CHECK:    %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow<nsw> : i32
+! CHECK:    %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] : i32
 ! CHECK:    fir.result %[[J_NEXT]], %[[J_IVINC]] : index, i32
 ! CHECK:  }
 ! CHECK:  fir.store %[[J_FINAL]]#1 to %[[J_REF]] : !fir.ref<i32>
@@ -109,6 +107,39 @@ subroutine structured_loop_in_infinite(i)
 ! CHECK: ^[[RETURN]]:
 ! CHECK:   return
 
+! NSW-LABEL: structured_loop_in_infinite
+! NSW-SAME: %[[I_REF:.*]]: !fir.ref<i32>
+! NSW:  %[[J_REF:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFstructured_loop_in_infiniteEj"}
+! NSW:  cf.br ^[[BODY1:.*]]
+! NSW: ^[[BODY1]]:
+! NSW:  %[[I:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
+! NSW:  %[[C100:.*]] = arith.constant 100 : i32
+! NSW:  %[[COND:.*]] = arith.cmpi sgt, %[[I]], %[[C100]] : i32
+! NSW:  cf.cond_br %[[COND]], ^[[EXIT:.*]], ^[[BODY2:.*]]
+! NSW: ^[[EXIT]]:
+! NSW:  cf.br ^[[RETURN:.*]]
+! NSW: ^[[BODY2:.*]]:
+! NSW:  %[[C1:.*]] = arith.constant 1 : i32
+! NSW:  %[[C1_INDEX:.*]] = fir.convert %[[C1]] : (i32) -> index
+! NSW:  %[[C10:.*]] = arith.constant 10 : i32
+! NSW:  %[[C10_INDEX:.*]] = fir.convert %[[C10]] : (i32) -> index
+! NSW:  %[[C1_1:.*]] = arith.constant 1 : index
+! NSW:  %[[J_LB:.*]] = fir.convert %[[C1_INDEX]] : (index) -> i32
+! NSW:  %[[J_FINAL:.*]]:2 = fir.do_loop %[[J:[^ ]*]] =
+! NSW-SAME: %[[C1_INDEX]] to %[[C10_INDEX]] step %[[C1_1]]
+! NSW-SAME: iter_args(%[[J_IV:.*]] = %[[J_LB]]) -> (index, i32) {
+! NSW:    fir.store %[[J_IV]] to %[[J_REF]] : !fir.ref<i32>
+! NSW:    %[[J_NEXT:.*]] = arith.addi %[[J]], %[[C1_1]] overflow<nsw> : index
+! NSW:    %[[J_STEPCAST:.*]] = fir.convert %[[C1_1]] : (index) -> i32
+! NSW:    %[[J_IVLOAD:.*]] = fir.load %[[J_REF]] : !fir.ref<i32>
+! NSW:    %[[J_IVINC:.*]] = arith.addi %[[J_IVLOAD]], %[[J_STEPCAST]] overflow<nsw> : i32
+! NSW:    fir.result %[[J_NEXT]], %[[J_IVINC]] : index, i32
+! NSW:  }
+! NSW:  fir.store %[[J_FINAL]]#1 to %[[J_REF]] : !fir.ref<i32>
+! NSW:  cf.br ^[[BODY1]]
+! NSW: ^[[RETURN]]:
+! NSW:   return
+
 subroutine empty_infinite_in_while(i)
   integer :: i
   do while (i .gt. 50)
diff --git a/flang/test/Lower/io-implied-do-fixes.f90 b/flang/test/Lower/io-implied-do-fixes.f90
index cd4fd43e051946e..a6c115fa80ded17 100644
--- a/flang/test/Lower/io-implied-do-fixes.f90
+++ b/flang/test/Lower/io-implied-do-fixes.f90
@@ -1,20 +1,30 @@
 ! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false %s -o - | FileCheck %s
-! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -fwrapv %s -o - | FileCheck %s --check-prefix=NO-NSW
+! RUN: bbc --use-desc-for-alloc=false -emit-fir -hlfir=false -integer-overflow %s -o - | FileCheck %s --check-prefix=NSW
 ! UNSUPPORTED: system-windows
 
-! NO-NSW-NOT: overflow<nsw>
-
 ! CHECK-LABEL: func @_QPido1
 ! CHECK: %[[J_REF_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFido1Eiptr.addr"}
 ! CHECK: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.ptr<i32>>
 ! CHECK: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
 ! CHECK:   %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
 ! CHECK:   fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.ptr<i32>
-! CHECK:   %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! CHECK:   %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index
 ! CHECK:   fir.result %[[J_VAL_NEXT]] : index
 ! CHECK: }
 ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.ptr<i32>
+
+! NSW-LABEL: func @_QPido1
+! NSW: %[[J_REF_ADDR:.*]] = fir.alloca !fir.ptr<i32> {uniq_name = "_QFido1Eiptr.addr"}
+! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.ptr<i32>>
+! NSW: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
+! NSW:   %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
+! NSW:   fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.ptr<i32>
+! NSW:   %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! NSW:   fir.result %[[J_VAL_NEXT]] : index
+! NSW: }
+! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
+! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.ptr<i32>
 subroutine ido1
   integer, pointer :: iptr
   integer, target :: itgt
@@ -28,11 +38,23 @@ subroutine ido1
 ! CHECK: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
 ! CHECK: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
 ! CHECK: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
-! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! CHECK: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index
 ! CHECK: fir.result %[[J_VAL_NEXT]] : index
 ! CHECK: }
 ! CHECK: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
 ! CHECK: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32>
+
+! NSW-LABEL: func @_QPido2
+! NSW: %[[J_REF_ADDR:.*]] = fir.alloca !fir.heap<i32> {uniq_name = "_QFido2Eiptr.addr"}
+! NSW: %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.heap<i32>>
+! NSW: %[[J_VAL_FINAL:.*]] = fir.do_loop %[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}} -> index {
+! NSW: %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
+! NSW: fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
+! NSW: %[[J_VAL_NEXT:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! NSW: fir.result %[[J_VAL_NEXT]] : index
+! NSW: }
+! NSW: %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]] : (index) -> i32
+! NSW: fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32>
 subroutine ido2
   integer, allocatable :: iptr
   allocate(iptr)
@@ -47,12 +69,27 @@ subroutine ido2
 ! CHECK:    fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
 ! CHECK:    %[[RES:.*]] = fir.if %[[OK]] -> (i1) {
 ! CHECK:    }
-! CHECK:   %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! CHECK:   %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} : index
 ! CHECK:   %[[J_VAL_NEXT:.*]] = arith.select %[[RES]], %[[J_VAL_INC]], %[[J_VAL]] : index
 ! CHECK:   fir.result %[[J_VAL_NEXT]], %[[RES]] : index, i1
 ! CHECK:  }
 ! CHECK:  %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]]#0 : (index) -> i32
 ! CHECK:  fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32
+
+! NSW-LABEL: func @_QPido3
+! NSW:  %[[J_REF_ADDR:.*]] = fir.alloca !fir.heap<i32> {uniq_name = "_QFido3Ej.addr"}
+! NSW:  %[[J_ADDR:.*]] = fir.load %[[J_REF_ADDR]] : !fir.ref<!fir.heap<i32>>
+! NSW:  %[[J_VAL_FINAL:.*]]:2 = fir.iterate_while (%[[J_VAL:.*]] = %{{.*}} to %{{.*}} step %{{.*}}) and (%[[OK:.*]] = {{.*}}) -> (index, i1) {
+! NSW:    %[[J_VAL_CVT1:.*]] = fir.convert %[[J_VAL]] : (index) -> i32
+! NSW:    fir.store %[[J_VAL_CVT1]] to %[[J_ADDR]] : !fir.heap<i32>
+! NSW:    %[[RES:.*]] = fir.if %[[OK]] -> (i1) {
+! NSW:    }
+! NSW:   %[[J_VAL_INC:.*]] = arith.addi %[[J_VAL]], %{{[^ ]*}} overflow<nsw> : index
+! NSW:   %[[J_VAL_NEXT:.*]] = arith.select %[[RES]], %[[J_VAL_INC]], %[[J_VAL]] : index
+! NSW:   fir.result %[[J_VAL_NEXT]], %[[RES]] : index, i1
+! NSW:  }
+! NSW:  %[[J_VAL_CVT2:.*]] = fir.convert %[[J_VAL_FINAL]]#0 : (index) -> i32
+! NSW:  fir.store %[[J_VAL_CVT2]] to %[[J_ADDR]] : !fir.heap<i32
 subroutine ido3
   integer, allocatable :: j
   allocate(j)
diff --git a/flang/test/Lower/loops2.f90 b/flang/test/Lower/loops2.f90
index 60a6bf6c5311977..0a587234a991b69 100644
--- a/flang/test/Lower/loops2.f90
+++ b/flang/test/Lower/loops2.f90
@@ -107,7 +107,7 @@ subroutine test_pointer_unstructured_loop()
 ! CHECK:       ^bb4:
 ! CHECK:         %[[VAL_20:.*]] = fir.load %[[VAL_3]] : !fir.ptr<i32>
 ! CHECK:         %[[VAL_21:.*]] = arith.constant 1 : i32
-! CHECK:         %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_21]] overflow<nsw> : i32
+! CHECK:         %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_21]] : i32
 ! CHECK:         fir.store %[[VAL_22]] to %[[VAL_3]] : !fir.ptr<i32>
 ! CHECK:         br ^bb1
 ! CHECK:       ^bb5:
diff --git a/flang/test/Lower/mixed_loops.f90 b/flang/test/Lower/mixed_loops.f90
index 991fd7aa82bb955..1aa0225129bed62 100644
--- a/flang/test/Lower/mixed_loops.f90
+++ b/flang/test/Lower/mixed_loops.f90
@@ -53,7 +53,7 @@ subroutine while_inside_do_loop
   ! CHECK: fir.store %[[TDEC]] to %[[T_REF]]
   ! CHECK: %[[I3:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
   ! CHECK: %[[C1_2:.*]] = arith.constant 1 : i32
-  ! CHECK: %[[IINC:.*]] = arith.addi %[[I3]], %[[C1_2]] overflow<nsw> : i32
+  ! CHECK: %[[IINC:.*]] = arith.addi %[[I3]], %[[C1_2]] : i32
   ! CHECK: fir.store %[[IINC]] to %[[I_REF]] : !fir.ref<i32>
   ! CHECK: br ^[[HDR1]]
   end do
@@ -100,10 +100,10 @@ subroutine do_inside_while_loop
         ! CHECK-DAG: %[[C2:.*]] = arith.constant 2 : i32
         ! CHECK: %[[JINC:.*]] = arith.muli %[[C2]], %[[J2]] : i32
         ! CHECK: fir.store %[[JINC]] to %[[J_REF]] : !fir.ref<i32>
-        ! CHECK: %[[IINC:.*]] = arith.addi %[[IDX]], %[[C1]] overflow<nsw> : index
+        ! CHECK: %[[IINC:.*]] = arith.addi %[[IDX]], %[[C1]] : index
         ! CHECK: %[[I_STEPCAST:.*]] = fir.convert %[[C1]] : (index) -> i32
         ! CHECK: %[[I_IVLOAD:.*]] = fir.load %[[I_REF]] : !fir.ref<i32>
-        ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] overflow<nsw> : i32
+        ! CHECK: %[[I_IVINC:.*]] = arith.addi %[[I_IVLOAD]], %[[I_STEPCAST]] : i32
         ! CHECK: fir.result %[[IINC]], %[[I_IVINC]] : index, i32
       do i=8,13
         j=j*2
diff --git a/flang/test/Lower/vector-subscript-io.f90 b/flang/test/Lower/vector-subscript-io.f90
index 372130fd099074f..129e3ee1206c09e 100644
--- a/flang/test/Lower/vector-subscript-io.f90
+++ b/flang/test/Lower/vector-subscript-io.f90
@@ -30,7 +30,7 @@ subroutine simple(x, y)
 ! CHECK:   %[[VAL_19:.*]] = fir.array_coor %[[VAL_20]](%[[VAL_10]]) {{\[}}%[[VAL_11]]] %[[VAL_18]] : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>, !fir.slice<1>, index) -> !fir.ref<i32>
 ! CHECK:   %[[VAL_21:.*]] = fir.convert %[[VAL_19]] : (!fir.ref<i32>) -> !fir.ref<i64>
 ! CHECK:   %[[VAL_22:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_9]], %[[VAL_21]], %[[VAL_3]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i64>, i32) -> i1
-! CHECK:   %[[VAL_23:.*]] = arith.addi %[[VAL_12]], %[[VAL_6]] overflow<nsw> : index
+! CHECK:   %[[VAL_23:.*]] = arith.addi %[[VAL_12]], %[[VAL_6]] : index
 ! CHECK:   %[[VAL_24:.*]] = arith.subi %[[VAL_13]], %[[VAL_6]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_23]], %[[VAL_24]] : index, index)
 ! CHECK: ^bb3:
@@ -79,7 +79,7 @@ integer function get_substcript()
 ! CHECK:   %[[VAL_49:.*]] = fir.convert %[[VAL_48]] : (i32) -> index
 ! CHECK:   %[[VAL_50:.*]] = fir.array_coor %[[VAL_51]] {{\[}}%[[VAL_42]]] %[[VAL_46]], %[[VAL_49]] : (!fir.box<!fir.array<?x?xf32>>, !fir.slice<2>, index, index) -> !fir.ref<f32>
 ! CHECK:   %[[VAL_52:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_34]], %[[VAL_50]]) {{.*}}: (!fir.ref<i8>, !fir.ref<f32>) -> i1
-! CHECK:   %[[VAL_53:.*]] = arith.addi %[[VAL_43]], %[[VAL_30]] overflow<nsw> : index
+! CHECK:   %[[VAL_53:.*]] = arith.addi %[[VAL_43]], %[[VAL_30]] : index
 ! CHECK:   %[[VAL_54:.*]] = arith.subi %[[VAL_44]], %[[VAL_30]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_53]], %[[VAL_54]] : index, index)
 ! CHECK: ^bb3:
@@ -122,7 +122,7 @@ subroutine with_assumed_shapes(x, y)
 ! CHECK:   %[[VAL_77:.*]] = fir.array_coor %[[VAL_78]] {{\[}}%[[VAL_70]]] %[[VAL_76]] : (!fir.box<!fir.array<?xi32>>, !fir.slice<1>, index) -> !fir.ref<i32>
 ! CHECK:   %[[VAL_79:.*]] = fir.convert %[[VAL_77]] : (!fir.ref<i32>) -> !fir.ref<i64>
 ! CHECK:   %[[VAL_80:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_67]], %[[VAL_79]], %[[VAL_62]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i64>, i32) -> i1
-! CHECK:   %[[VAL_81:.*]] = arith.addi %[[VAL_71]], %[[VAL_64]] overflow<nsw> : index
+! CHECK:   %[[VAL_81:.*]] = arith.addi %[[VAL_71]], %[[VAL_64]] : index
 ! CHECK:   %[[VAL_82:.*]] = arith.subi %[[VAL_72]], %[[VAL_64]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_81]], %[[VAL_82]] : index, index)
 ! CHECK: ^bb3:
@@ -162,7 +162,7 @@ subroutine lower_bounds(x, y)
 ! CHECK:   %[[VAL_107:.*]] = fir.array_coor %[[VAL_108]](%[[VAL_97]]) {{\[}}%[[VAL_99]]] %[[VAL_91]], %[[VAL_106]] : (!fir.ref<!fir.array<4x6xi32>>, !fir.shapeshift<2>, !fir.slice<2>, index, index) -> !fir.ref<i32>
 ! CHECK:   %[[VAL_109:.*]] = fir.convert %[[VAL_107]] : (!fir.ref<i32>) -> !fir.ref<i64>
 ! CHECK:   %[[VAL_110:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_96]], %[[VAL_109]], %[[VAL_90]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i64>, i32) -> i1
-! CHECK:   %[[VAL_111:.*]] = arith.addi %[[VAL_100]], %[[VAL_93]] overflow<nsw> : index
+! CHECK:   %[[VAL_111:.*]] = arith.addi %[[VAL_100]], %[[VAL_93]] : index
 ! CHECK:   %[[VAL_112:.*]] = arith.subi %[[VAL_101]], %[[VAL_93]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_111]], %[[VAL_112]] : index, index)
 ! CHECK: ^bb3:
@@ -202,11 +202,11 @@ subroutine two_vectors(x, y1, y2)
 ! CHECK:   %[[VAL_138:.*]] = fir.convert %[[VAL_137]] : (i32) -> index
 ! CHECK:   %[[VAL_139:.*]] = fir.array_coor %[[VAL_140]](%[[VAL_123]]) {{\[}}%[[VAL_124]]] %[[VAL_134]], %[[VAL_138]] : (!fir.ref<!fir.array<4x4xf32>>, !fir.shape<2>, !fir.slice<2>, index, index) -> !fir.ref<f32>
 ! CHECK:   %[[VAL_141:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_122]], %[[VAL_139]]) {{.*}}: (!fir.ref<i8>, !fir.ref<f32>) -> i1
-! CHECK:   %[[VAL_142:.*]] = arith.addi %[[VAL_128]], %[[VAL_119]] overflow<nsw> : index
+! CHECK:   %[[VAL_142:.*]] = arith.addi %[[VAL_128]], %[[VAL_119]] : index
 ! CHECK:   %[[VAL_143:.*]] = arith.subi %[[VAL_129]], %[[VAL_119]] : index
 ! CHECK:   cf.br ^bb2(%[[VAL_142]], %[[VAL_143]] : index, index)
 ! CHECK: ^bb4:
-! CHECK:   %[[VAL_144:.*]] = arith.addi %[[VAL_125]], %[[VAL_119]] overflow<nsw> : index
+! CHECK:   %[[VAL_144:.*]] = arith.addi %[[VAL_125]], %[[VAL_119]] : index
 ! CHECK:   %[[VAL_145:.*]] = arith.subi %[[VAL_126]], %[[VAL_119]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_144]], %[[VAL_145]] : index, index)
 ! CHECK: ^bb5:
@@ -245,11 +245,11 @@ subroutine triplets_and_vector(x, y)
 ! CHECK:   %[[VAL_169:.*]] = fir.array_coor %[[VAL_170]](%[[VAL_157]]) {{\[}}%[[VAL_158]]] %[[VAL_162]], %[[VAL_168]] : (!fir.ref<!fir.array<4x4xcomplex<f32>>>, !fir.shape<2>, !fir.slice<2>, index, index) -> !fir.ref<complex<f32>>
 ! CHECK:   %[[VAL_171:.*]] = fir.convert %[[VAL_169]] : (!fir.ref<complex<f32>>) -> !fir.ref<f32>
 ! CHECK:   %[[VAL_172:.*]] = fir.call @_FortranAioInputComplex32(%[[VAL_156]], %[[VAL_171]]) {{.*}}: (!fir.ref<i8>, !fir.ref<f32>) -> i1
-! CHECK:   %[[VAL_173:.*]] = arith.addi %[[VAL_162]], %[[VAL_153]] overflow<nsw> : index
+! CHECK:   %[[VAL_173:.*]] = arith.addi %[[VAL_162]], %[[VAL_153]] : index
 ! CHECK:   %[[VAL_174:.*]] = arith.subi %[[VAL_163]], %[[VAL_153]] : index
 ! CHECK:   cf.br ^bb2(%[[VAL_173]], %[[VAL_174]] : index, index)
 ! CHECK: ^bb4:
-! CHECK:   %[[VAL_175:.*]] = arith.addi %[[VAL_159]], %[[VAL_153]] overflow<nsw> : index
+! CHECK:   %[[VAL_175:.*]] = arith.addi %[[VAL_159]], %[[VAL_153]] : index
 ! CHECK:   %[[VAL_176:.*]] = arith.subi %[[VAL_160]], %[[VAL_153]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_175]], %[[VAL_176]] : index, index)
 ! CHECK: ^bb5:
@@ -287,7 +287,7 @@ subroutine simple_char(x, y)
 ! CHECK:   %[[VAL_200:.*]] = fir.convert %[[VAL_199]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
 ! CHECK:   %[[VAL_201:.*]] = fir.convert %[[VAL_184]]#1 : (index) -> i64
 ! CHECK:   %[[VAL_202:.*]] = fir.call @_FortranAioInputAscii(%[[VAL_189]], %[[VAL_200]], %[[VAL_201]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64) -> i1
-! CHECK:   %[[VAL_203:.*]] = arith.addi %[[VAL_192]], %[[VAL_183]] overflow<nsw> : index
+! CHECK:   %[[VAL_203:.*]] = arith.addi %[[VAL_192]], %[[VAL_183]] : index
 ! CHECK:   %[[VAL_204:.*]] = arith.subi %[[VAL_193]], %[[VAL_183]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_203]], %[[VAL_204]] : index, index)
 ! CHECK: ^bb3:
@@ -333,7 +333,7 @@ subroutine substring(x, y, i, j)
 ! CHECK:   %[[VAL_238:.*]] = fir.convert %[[VAL_233]] : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
 ! CHECK:   %[[VAL_239:.*]] = fir.convert %[[VAL_237]] : (index) -> i64
 ! CHECK:   %[[VAL_240:.*]] = fir.call @_FortranAioInputAscii(%[[VAL_213]], %[[VAL_238]], %[[VAL_239]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i8>, i64) -> i1
-! CHECK:   %[[VAL_241:.*]] = arith.addi %[[VAL_221]], %[[VAL_210]] overflow<nsw> : index
+! CHECK:   %[[VAL_241:.*]] = arith.addi %[[VAL_221]], %[[VAL_210]] : index
 ! CHECK:   %[[VAL_242:.*]] = arith.subi %[[VAL_222]], %[[VAL_210]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_241]], %[[VAL_242]] : index, index)
 ! CHECK: ^bb3:
@@ -366,7 +366,7 @@ subroutine complex_part(z, y)
 ! CHECK:   %[[VAL_260:.*]] = fir.convert %[[VAL_259]] : (i32) -> index
 ! CHECK:   %[[VAL_261:.*]] = fir.array_coor %[[VAL_262]] {{\[}}%[[VAL_254]]] %[[VAL_260]] : (!fir.box<!fir.array<?xcomplex<f32>>>, !fir.slice<1>, index) -> !fir.ref<f32>
 ! CHECK:   %[[VAL_263:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_251]], %[[VAL_261]]) {{.*}}: (!fir.ref<i8>, !fir.ref<f32>) -> i1
-! CHECK:   %[[VAL_264:.*]] = arith.addi %[[VAL_255]], %[[VAL_248]] overflow<nsw> : index
+! CHECK:   %[[VAL_264:.*]] = arith.addi %[[VAL_255]], %[[VAL_248]] : index
 ! CHECK:   %[[VAL_265:.*]] = arith.subi %[[VAL_256]], %[[VAL_248]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_264]], %[[VAL_265]] : index, index)
 ! CHECK: ^bb3:
@@ -414,7 +414,7 @@ subroutine simple_derived(x, y)
 ! CHECK:   %[[VAL_288:.*]] = fir.embox %[[VAL_286]] : (!fir.ref<!fir.type<_QMderived_typesTt{i:i32,c:!fir.char<1,2>}>>) -> !fir.box<!fir.type<_QMderived_typesTt{i:i32,c:!fir.char<1,2>}>>
 ! CHECK:   %[[VAL_289:.*]] = fir.convert %[[VAL_288]] : (!fir.box<!fir.type<_QMderived_typesTt{i:i32,c:!fir.char<1,2>}>>) -> !fir.box<none>
 ! CHECK:   %[[VAL_290:.*]] = fir.call @_FortranAioInputDerivedType(%[[VAL_276]], %[[VAL_289]], {{.*}}) {{.*}}: (!fir.ref<i8>, !fir.box<none>, !fir.ref<none>) -> i1
-! CHECK:   %[[VAL_291:.*]] = arith.addi %[[VAL_279]], %[[VAL_273]] overflow<nsw> : index
+! CHECK:   %[[VAL_291:.*]] = arith.addi %[[VAL_279]], %[[VAL_273]] : index
 ! CHECK:   %[[VAL_292:.*]] = arith.subi %[[VAL_280]], %[[VAL_273]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_291]], %[[VAL_292]] : index, index)
 ! CHECK: ^bb3:
@@ -463,11 +463,11 @@ subroutine with_path(b, i)
 ! CHECK:   %[[VAL_325:.*]] = fir.array_coor %[[VAL_326:.*]](%[[VAL_313]]) {{\[}}%[[VAL_315]]] %[[VAL_301]], %[[VAL_324]], %[[VAL_316]] : (!fir.box<!fir.array<?x?x?x!fir.type<_QMderived_typesTt2{a:!fir.array<5x5x!fir.type<_QMderived_typesTt{i:i32,c:!fir.char<1,2>}>>}>>>, !fir.shift<3>, !fir.slice<3>, index, index, index) -> !fir.ref<i32>
 ! CHECK:   %[[VAL_327:.*]] = fir.convert %[[VAL_325]] : (!fir.ref<i32>) -> !fir.ref<i64>
 ! CHECK:   %[[VAL_328:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_308]], %[[VAL_327]], %[[VAL_302]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i64>, i32) -> i1
-! CHECK:   %[[VAL_329:.*]] = arith.addi %[[VAL_319]], %[[VAL_305]] overflow<nsw> : index
+! CHECK:   %[[VAL_329:.*]] = arith.addi %[[VAL_319]], %[[VAL_305]] : index
 ! CHECK:   %[[VAL_330:.*]] = arith.subi %[[VAL_320]], %[[VAL_305]] : index
 ! CHECK:   cf.br ^bb2(%[[VAL_329]], %[[VAL_330]] : index, index)
 ! CHECK: ^bb4:
-! CHECK:   %[[VAL_331:.*]] = arith.addi %[[VAL_316]], %[[VAL_305]] overflow<nsw> : index
+! CHECK:   %[[VAL_331:.*]] = arith.addi %[[VAL_316]], %[[VAL_305]] : index
 ! CHECK:   %[[VAL_332:.*]] = arith.subi %[[VAL_317]], %[[VAL_305]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_331]], %[[VAL_332]] : index, index)
 ! CHECK: ^bb5:
@@ -505,7 +505,7 @@ subroutine simple_iostat(x, y, j, stat)
 ! CHECK:   %[[VAL_355:.*]] = fir.convert %[[VAL_354]] : (i32) -> index
 ! CHECK:   %[[VAL_356:.*]] = fir.array_coor %[[VAL_357]] {{\[}}%[[VAL_347]]] %[[VAL_355]] : (!fir.box<!fir.array<?xf32>>, !fir.slice<1>, index) -> !fir.ref<f32>
 ! CHECK:   %[[VAL_358:.*]] = fir.call @_FortranAioInputReal32(%[[VAL_343]], %[[VAL_356]]) {{.*}}: (!fir.ref<i8>, !fir.ref<f32>) -> i1
-! CHECK:   %[[VAL_359:.*]] = arith.addi %[[VAL_349]], %[[VAL_338]] overflow<nsw> : index
+! CHECK:   %[[VAL_359:.*]] = arith.addi %[[VAL_349]], %[[VAL_338]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_359]], %[[VAL_358]] : index, i1)
 ! CHECK: ^bb3:
 ! CHECK:   cf.cond_br %[[VAL_350]], ^bb4, ^bb5
@@ -568,10 +568,10 @@ subroutine iostat_in_io_loop(k, j, stat)
 ! CHECK:   %[[VAL_399:.*]] = fir.array_coor %[[VAL_400]](%[[VAL_387]]) {{\[}}%[[VAL_389]]] %[[VAL_394]], %[[VAL_398]] : (!fir.ref<!fir.array<3x5xi32>>, !fir.shape<2>, !fir.slice<2>, index, index) -> !fir.ref<i32>
 ! CHECK:   %[[VAL_401:.*]] = fir.convert %[[VAL_399]] : (!fir.ref<i32>) -> !fir.ref<i64>
 ! CHECK:   %[[VAL_402:.*]] = fir.call @_FortranAioInputInteger(%[[VAL_378]], %[[VAL_401]], %[[VAL_374]]) {{.*}}: (!fir.ref<i8>, !fir.ref<i64>, i32) -> i1
-! CHECK:   %[[VAL_403:.*]] = arith.addi %[[VAL_390]], %[[VAL_371]] overflow<nsw> : index
+! CHECK:   %[[VAL_403:.*]] = arith.addi %[[VAL_390]], %[[VAL_371]] : index
 ! CHECK:   cf.br ^bb4(%[[VAL_403]], %[[VAL_402]] : index, i1)
 ! CHECK: ^bb6(%[[VAL_404:.*]]: i1):
-! CHECK:   %[[VAL_405:.*]] = arith.addi %[[VAL_380]], %[[VAL_371]] overflow<nsw> : index
+! CHECK:   %[[VAL_405:.*]] = arith.addi %[[VAL_380]], %[[VAL_371]] : index
 ! CHECK:   cf.br ^bb1(%[[VAL_405]], %[[VAL_404]] : index, i1)
 ! CHECK: ^bb7:
 ! CHECK:   %[[VAL_406:.*]] = fir.convert %[[VAL_380]] : (index) -> i32
diff --git a/flang/test/Parser/OpenMP/master-unparse.f90 b/flang/test/Parser/OpenMP/master-unparse.f90
new file mode 100644
index 000000000000000..30c293a521b5d1e
--- /dev/null
+++ b/flang/test/Parser/OpenMP/master-unparse.f90
@@ -0,0 +1,73 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+! Check for parsing of master directive
+
+
+subroutine test_master()
+  integer :: c = 1
+  !PARSE-TREE: OmpBeginBlockDirective
+  !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = master
+  !CHECK: !$omp master
+  !$omp master 
+  c = c + 1
+  !$omp end master
+end subroutine
+
+subroutine test_master_taskloop_simd()
+  integer :: i, j = 1
+  !PARSE-TREE: OmpBeginLoopDirective
+  !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = master taskloop simd
+  !CHECK: !$omp master taskloop simd
+  !$omp master taskloop simd 
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end master taskloop simd
+end subroutine
+
+subroutine test_master_taskloop
+  integer :: i, j = 1
+  !PARSE-TREE: OmpBeginLoopDirective
+  !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = master taskloop
+  !CHECK: !$omp master taskloop
+  !$omp master taskloop
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end master taskloop 
+end subroutine
+
+subroutine test_parallel_master
+  integer :: c = 2
+  !PARSE-TREE: OmpBeginBlockDirective
+  !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = parallel master
+  !CHECK: !$omp parallel master
+  !$omp parallel master
+  c = c + 2
+  !$omp end parallel master
+end subroutine
+
+subroutine test_parallel_master_taskloop_simd
+  integer :: i, j = 1
+  !PARSE-TREE: OmpBeginLoopDirective
+  !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = parallel master taskloop simd
+  !CHECK: !$omp parallel master taskloop simd
+  !$omp parallel master taskloop simd 
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end parallel master taskloop simd
+end subroutine
+
+subroutine test_parallel_master_taskloop
+  integer :: i, j = 1
+  !PARSE-TREE: OmpBeginLoopDirective
+  !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = parallel master taskloop
+  !CHECK: !$omp parallel master taskloop
+  !$omp parallel master taskloop
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end parallel master taskloop 
+end subroutine
diff --git a/flang/test/Parser/OpenMP/scope.f90 b/flang/test/Parser/OpenMP/scope.f90
new file mode 100644
index 000000000000000..6574136311e7187
--- /dev/null
+++ b/flang/test/Parser/OpenMP/scope.f90
@@ -0,0 +1,24 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=51 %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=51 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+program omp_scope
+  integer i
+  i = 10
+
+!CHECK: !$OMP SCOPE  PRIVATE(i)
+!CHECK: !$OMP END SCOPE
+
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct
+!PARSE-TREE: OmpBeginBlockDirective
+!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope
+!PARSE-TREE: OmpClauseList -> OmpClause -> Private -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'i'
+!PARSE-TREE: Block
+!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> PrintStmt
+!PARSE-TREE: OmpEndBlockDirective
+!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope
+!PARSE-TREE: OmpClauseList -> OmpClause -> Nowait
+
+  !$omp scope private(i)
+  print *, "omp scope", i
+  !$omp end scope nowait
+end program omp_scope
diff --git a/flang/test/Parser/OpenMP/taskloop.f90 b/flang/test/Parser/OpenMP/taskloop.f90
new file mode 100644
index 000000000000000..a9c361046bd5f5d
--- /dev/null
+++ b/flang/test/Parser/OpenMP/taskloop.f90
@@ -0,0 +1,41 @@
+! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s
+! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine parallel_work
+  integer :: i
+
+!CHECK: !$OMP TASKLOOP  GRAINSIZE(STRICT:500_4)
+!PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = taskloop
+!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Grainsize -> OmpGrainsizeClause
+!PARSE-TREE-NEXT: Prescriptiveness = Strict
+!PARSE-TREE-NEXT: Scalar -> Integer -> Expr = '500_4'
+  !$omp taskloop grainsize(strict: 500)
+  do i=1,10000
+    call loop_body(i)
+  end do
+  !$omp end taskloop
+
+!CHECK: !$OMP TASKLOOP  GRAINSIZE(500_4)
+!PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = taskloop
+!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Grainsize -> OmpGrainsizeClause
+!PARSE-TREE-NEXT: Scalar -> Integer -> Expr = '500_4'
+  !$omp taskloop grainsize(500)
+  do i=1,10000
+    call loop_body(i)
+  end do
+  !$omp end taskloop
+
+!CHECK: !$OMP TASKLOOP  NUM_TASKS(STRICT:500_4)
+!PARSE-TREE: OmpBeginLoopDirective
+!PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = taskloop
+!PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> NumTasks -> OmpNumTasksClause
+!PARSE-TREE-NEXT: Prescriptiveness = Strict
+!PARSE-TREE-NEXT: Scalar -> Integer -> Expr = '500_4'
+  !$omp taskloop num_tasks(strict: 500)
+  do i=1,10000
+    call loop_body(i)
+  end do
+  !$omp end taskloop
+end subroutine parallel_work
diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90
index 1a7a57b124e9bda..124f1a02d99fba7 100644
--- a/flang/test/Semantics/OpenMP/clause-validity01.f90
+++ b/flang/test/Semantics/OpenMP/clause-validity01.f90
@@ -476,14 +476,14 @@
 ! 2.13.1 master
 
   !$omp parallel
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !$omp master
   a=3.14
   !$omp end master
   !$omp end parallel
 
   !$omp parallel
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !ERROR: NUM_THREADS clause is not allowed on the MASTER directive
   !$omp master num_threads(4)
   a=3.14
diff --git a/flang/test/Semantics/OpenMP/default-none.f90 b/flang/test/Semantics/OpenMP/default-none.f90
index 11ba878ea779403..761c2385466a088 100644
--- a/flang/test/Semantics/OpenMP/default-none.f90
+++ b/flang/test/Semantics/OpenMP/default-none.f90
@@ -47,3 +47,14 @@ subroutine sb4
     end do loop
   !$omp end parallel
 end subroutine
+
+! Test that default(none) does not error for assumed-size array
+subroutine sub( aaa)
+  real,dimension(*),intent(in)::aaa
+  integer::ip
+  real::ccc
+!$omp parallel do private(ip,ccc) default(none)
+  do ip = 1, 10
+     ccc= aaa(ip)
+  end do
+end subroutine sub
diff --git a/flang/test/Semantics/OpenMP/depend04.f90 b/flang/test/Semantics/OpenMP/depend04.f90
new file mode 100644
index 000000000000000..8bdddb017d2c9d9
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/depend04.f90
@@ -0,0 +1,10 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50
+
+subroutine f00
+  integer :: x
+  common /cc/ x
+!ERROR: Common block name ('cc') cannot appear in a DEPEND clause
+  !$omp task depend(in: /cc/)
+  x = 0
+  !$omp end task
+end
diff --git a/flang/test/Semantics/OpenMP/depend05.f90 b/flang/test/Semantics/OpenMP/depend05.f90
new file mode 100644
index 000000000000000..53fd82bd08a9eb2
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/depend05.f90
@@ -0,0 +1,9 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45 -Werror
+
+subroutine f00(x)
+  integer :: x(10)
+!WARNING: Iterator modifiers are not supported in OpenMP v4.5, try -fopenmp-version=50
+  !$omp task depend(iterator(i = 1:10), in: x(i))
+  x = 0
+  !$omp end task
+end
diff --git a/flang/test/Semantics/OpenMP/deprecation.f90 b/flang/test/Semantics/OpenMP/deprecation.f90
new file mode 100644
index 000000000000000..e04f43026bbce27
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/deprecation.f90
@@ -0,0 +1,59 @@
+! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -Werror
+
+! Check for deprecation of master directive and its combined/composite variants
+
+subroutine test_master()
+  integer :: c = 1
+!WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
+  !$omp master 
+  c = c + 1
+  !$omp end master
+end subroutine
+
+subroutine test_parallel_master
+  integer :: c = 2
+!WARNING: OpenMP directive PARALLEL MASTER has been deprecated, please use PARALLEL MASKED instead.
+  !$omp parallel master
+  c = c + 2
+  !$omp end parallel master
+end subroutine
+
+subroutine test_master_taskloop_simd()
+  integer :: i, j = 1
+!WARNING: OpenMP directive MASTER TASKLOOP SIMD has been deprecated, please use MASKED TASKLOOP SIMD instead.
+  !$omp master taskloop simd 
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end master taskloop simd
+end subroutine
+
+subroutine test_master_taskloop
+  integer :: i, j = 1
+!WARNING: OpenMP directive MASTER TASKLOOP has been deprecated, please use MASKED TASKLOOP instead.
+  !$omp master taskloop
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end master taskloop 
+end subroutine
+
+subroutine test_parallel_master_taskloop_simd
+  integer :: i, j = 1
+!WARNING: OpenMP directive PARALLEL MASTER TASKLOOP SIMD has been deprecated, please use PARALLEL_MASKED TASKLOOP SIMD instead.
+  !$omp parallel master taskloop simd 
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end parallel master taskloop simd
+end subroutine
+
+subroutine test_parallel_master_taskloop
+  integer :: i, j = 1
+!WARNING: OpenMP directive PARALLEL MASTER TASKLOOP has been deprecated, please use PARALLEL MASKED TASKLOOP instead.
+  !$omp parallel master taskloop
+  do i=1,10
+   j = j + 1
+  end do
+  !$omp end parallel master taskloop 
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90
index f06719f302fd7a7..ed0cf6602d574af 100644
--- a/flang/test/Semantics/OpenMP/flush02.f90
+++ b/flang/test/Semantics/OpenMP/flush02.f90
@@ -80,7 +80,7 @@
 
   !$omp parallel num_threads(4)
     array = (/1, 2, 3, 4, 5, 6, 7, 8, 9, 10/)
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !$omp master
       !$omp flush (array)
     !$omp end master
diff --git a/flang/test/Semantics/OpenMP/invalid-branch.f90 b/flang/test/Semantics/OpenMP/invalid-branch.f90
index ed9e4d268f65a8c..28aab8b122f3f2c 100644
--- a/flang/test/Semantics/OpenMP/invalid-branch.f90
+++ b/flang/test/Semantics/OpenMP/invalid-branch.f90
@@ -105,4 +105,12 @@ program omp_invalid_branch
   !$omp end parallel
   9 print *, "2nd alternate return"
 
+  !CHECK: invalid branch into an OpenMP structured block
+  goto 100
+  !$omp scope
+    100 continue
+    !CHECK: invalid branch leaving an OpenMP structured block
+    goto 200
+  !$omp end scope
+  200 continue
 end program
diff --git a/flang/test/Semantics/OpenMP/nested-barrier.f90 b/flang/test/Semantics/OpenMP/nested-barrier.f90
index aae283229e330d1..7c635d8e23cc0d1 100644
--- a/flang/test/Semantics/OpenMP/nested-barrier.f90
+++ b/flang/test/Semantics/OpenMP/nested-barrier.f90
@@ -75,7 +75,7 @@ program omp_nest_barrier
   end do
   !$omp end critical
 
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !$omp master
   do i = 1, 10
     k = k + 1
@@ -108,7 +108,7 @@ program omp_nest_barrier
   end do
   !$omp end ordered
 
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !$omp master
   do i = 1, 10
     !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region.
diff --git a/flang/test/Semantics/OpenMP/nested-master.f90 b/flang/test/Semantics/OpenMP/nested-master.f90
index 069de67cafae286..b21ca5d14159318 100644
--- a/flang/test/Semantics/OpenMP/nested-master.f90
+++ b/flang/test/Semantics/OpenMP/nested-master.f90
@@ -9,7 +9,7 @@ program omp_nest_master
   !$omp do
   do i = 1, 10
     k = k + 1
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$omp master
     j = j -1
@@ -17,7 +17,7 @@ program omp_nest_master
   end do
 
   !$omp sections 
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$omp master
     do i = 1, 10
@@ -27,7 +27,7 @@ program omp_nest_master
   !$omp end sections
 
   !$omp single 
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$omp master
     do i = 1, 10
@@ -41,7 +41,7 @@ program omp_nest_master
   !$omp task
   do i = 1, 10
     k = k + 1
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$omp master
     j = j -1
@@ -52,7 +52,7 @@ program omp_nest_master
   !$omp taskloop
   do i = 1, 10
     k = k + 1
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$omp master
     j = j -1
@@ -63,7 +63,7 @@ program omp_nest_master
   !$omp target parallel do simd
   do i = 1, 10
     k = k + 1
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !ERROR: The only OpenMP constructs that can be encountered during execution of a 'SIMD' region are the `ATOMIC` construct, the `LOOP` construct, the `SIMD` construct and the `ORDERED` construct with the `SIMD` clause.
     !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$omp master
@@ -75,7 +75,7 @@ program omp_nest_master
   !$omp critical
   do i = 1, 10
     k = k + 1
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !$omp master
     j = j -1
     !$omp end master
@@ -85,7 +85,7 @@ program omp_nest_master
   !$omp ordered
   do i = 1, 10
     k = k + 1
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !$omp master
     j = j -1
     !$omp end master
@@ -99,7 +99,7 @@ program omp_nest_master
     !$omp distribute
     do k =1, 10
       print *, "hello"
-      !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+      !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
       !$omp master
       j = j -1
       !$omp end master
@@ -116,7 +116,7 @@ program omp_nest_master
     !$omp distribute
     do k =1, 10
       print *, "hello"
-      !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+      !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
       !$omp master
       j = j -1
       !$omp end master
@@ -133,7 +133,7 @@ program omp_nest_master
     !$omp distribute
     do k =1, 10
       print *, "hello"
-      !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+      !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
       !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
       !$omp master
       j = j -1
@@ -151,7 +151,7 @@ program omp_nest_master
     !$omp distribute
     do k =1, 10
       print *, "hello"
-      !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+      !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
       !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
       !$omp master
       j = j -1
diff --git a/flang/test/Semantics/OpenMP/nested-teams.f90 b/flang/test/Semantics/OpenMP/nested-teams.f90
index f3b96b0ab439036..06eea12aba55956 100644
--- a/flang/test/Semantics/OpenMP/nested-teams.f90
+++ b/flang/test/Semantics/OpenMP/nested-teams.f90
@@ -42,7 +42,7 @@ program main
   !$omp end teams
   end do
 
-  !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+  !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
   !$omp master
   !ERROR: TEAMS region can only be strictly nested within the implicit parallel region or TARGET region
   !$omp teams
diff --git a/flang/test/Semantics/OpenMP/nested01.f90 b/flang/test/Semantics/OpenMP/nested01.f90
index 49c964ab86aa6bd..0936e4c1b45a5db 100644
--- a/flang/test/Semantics/OpenMP/nested01.f90
+++ b/flang/test/Semantics/OpenMP/nested01.f90
@@ -25,6 +25,13 @@
    !$omp end target
   enddo
   
+  !$omp do
+  do i = 1, N
+     !ERROR: A worksharing region may not be closely nested inside a worksharing, explicit task, taskloop, critical, ordered, atomic, or master region
+     !$omp scope
+     !$omp end scope
+  end do
+  !$omp end do
 
   !$omp do
   do i = 1, N
diff --git a/flang/test/Semantics/OpenMP/ordered-simd.f90 b/flang/test/Semantics/OpenMP/ordered-simd.f90
index ed52b7594910028..716dc42c28bb644 100644
--- a/flang/test/Semantics/OpenMP/ordered-simd.f90
+++ b/flang/test/Semantics/OpenMP/ordered-simd.f90
@@ -95,7 +95,7 @@ SUBROUTINE ORDERED_BAD(N)
 
   !$OMP CRITICAL  
     C =  C - A * B
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !$OMP MASTER
     DO I = 1,N
       !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region.
@@ -108,7 +108,7 @@ SUBROUTINE ORDERED_BAD(N)
 
   !$OMP ORDERED  
     C =  C - A * B
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !$OMP MASTER
     DO I = 1,N
       !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region.
@@ -121,7 +121,7 @@ SUBROUTINE ORDERED_BAD(N)
 
   !$OMP TASK  
     C =  C - A * B
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$OMP MASTER
     DO I = 1,N
@@ -136,7 +136,7 @@ SUBROUTINE ORDERED_BAD(N)
   !$OMP TASKLOOP
   DO J= 1,N  
     C =  C - A * B
-    !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead.
+    !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead.
     !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region.
     !$OMP MASTER
     DO I = 1,N
diff --git a/flang/test/Transforms/debug-index-type.fir b/flang/test/Transforms/debug-index-type.fir
new file mode 100644
index 000000000000000..20bd8471d7cf648
--- /dev/null
+++ b/flang/test/Transforms/debug-index-type.fir
@@ -0,0 +1,10 @@
+// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<>} {
+  func.func private @str(%arg0: index) -> i32 loc(#loc1)
+}
+#loc1 = loc("test.f90":5:1)
+
+// CHECK: #[[INT32_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 32, encoding = DW_ATE_signed>
+// CHECK: #[[INT64_TY:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "integer", sizeInBits = 64, encoding = DW_ATE_signed>
+// CHECK: #llvm.di_subroutine_type<{{.*}}types = #[[INT32_TY]], #[[INT64_TY]]>
diff --git a/flang/test/Transforms/debug-tuple-type.fir b/flang/test/Transforms/debug-tuple-type.fir
new file mode 100644
index 000000000000000..c9b0d16c06e1ae2
--- /dev/null
+++ b/flang/test/Transforms/debug-tuple-type.fir
@@ -0,0 +1,15 @@
+// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<>} {
+  func.func private @fn1(!fir.ref<tuple<f64, f64>>)
+  func.func private @_FortranAioOutputDerivedType(!fir.ref<tuple<>>)
+}
+
+// CHECK: #[[F64:.*]] = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "real", sizeInBits = 64, encoding = DW_ATE_float>
+// CHECK: #[[CU:.*]] = #llvm.di_compile_unit<{{.*}}>
+// CHECK: #[[DTY1:.*]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "", baseType = #[[F64]], sizeInBits = 64, alignInBits = {{.*}}>
+// CHECK: #[[DTY2:.*]] = #llvm.di_derived_type<tag = DW_TAG_member, name = "", baseType = #[[F64]], sizeInBits = 64, alignInBits = {{.*}}, offsetInBits = {{.*}}>
+// CHECK: #[[COM_TY1:.*]] = #llvm.di_composite_type<tag = DW_TAG_structure_type, name = "", file = #{{.*}}, scope = #[[CU]]{{.*}}elements = #[[DTY1]], #[[DTY2]]>
+// CHECK: #[[COM_TY2:.*]] = #llvm.di_composite_type<tag = DW_TAG_structure_type, name = "", file = #{{.*}}, scope = #[[CU]]>
+// CHECK: #llvm.di_subroutine_type<callingConvention = DW_CC_normal, types = #di_null_type, #[[COM_TY1]]>
+// CHECK: #llvm.di_subroutine_type<callingConvention = DW_CC_normal, types = #di_null_type, #[[COM_TY2]]>
diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp
index af38bf272b02b16..fe5e36f704c76cf 100644
--- a/flang/tools/bbc/bbc.cpp
+++ b/flang/tools/bbc/bbc.cpp
@@ -234,6 +234,12 @@ static llvm::cl::opt<bool> integerWrapAround(
     llvm::cl::desc("Treat signed integer overflow as two's complement"),
     llvm::cl::init(false));
 
+// TODO: integrate this option with the above
+static llvm::cl::opt<bool>
+    setNSW("integer-overflow",
+           llvm::cl::desc("add nsw flag to internal operations"),
+           llvm::cl::init(false));
+
 #define FLANG_EXCLUDE_CODEGEN
 #include "flang/Optimizer/Passes/CommandLineOpts.h"
 #include "flang/Optimizer/Passes/Pipelines.h"
@@ -375,6 +381,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
   loweringOptions.setNoPPCNativeVecElemOrder(enableNoPPCNativeVecElemOrder);
   loweringOptions.setLowerToHighLevelFIR(useHLFIR || emitHLFIR);
   loweringOptions.setIntegerWrapAround(integerWrapAround);
+  loweringOptions.setNSWOnLoopVarInc(setNSW);
   std::vector<Fortran::lower::EnvironmentDefault> envDefaults = {};
   Fortran::frontend::TargetOptions targetOpts;
   Fortran::frontend::CodeGenOptions cgOpts;
@@ -460,7 +467,7 @@ static llvm::LogicalResult convertFortranSourceToMLIR(
 
     // Add O2 optimizer pass pipeline.
     MLIRToLLVMPassPipelineConfig config(llvm::OptimizationLevel::O2);
-    config.NSWOnLoopVarInc = !integerWrapAround;
+    config.NSWOnLoopVarInc = setNSW;
     fir::registerDefaultInlinerPass(config);
     fir::createDefaultFIROptimizerPassPipeline(pm, config);
   }
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 71c6e874429fedf..b3f94a581c8ad90 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -607,6 +607,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.canonicalizef16
     libc.src.math.ceilf16
     libc.src.math.copysignf16
+    libc.src.math.cospif16
     # TODO: aarch64 bug
     # Please see https://github.com/llvm/llvm-project/pull/100632#issuecomment-2258772681
     # libc.src.math.expf16
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9bc63edf06f28c9..a2fb97d04584d5b 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -611,6 +611,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.ceilf16
     libc.src.math.copysignf16
     libc.src.math.coshf16
+    libc.src.math.cospif16
     libc.src.math.exp10f16
     libc.src.math.exp10m1f16
     libc.src.math.exp2f16
diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
index ce4df92393ce7f4..a50e054622e1a45 100644
--- a/libc/docs/math/index.rst
+++ b/libc/docs/math/index.rst
@@ -280,7 +280,7 @@ Higher Math Functions
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | cosh      | |check|          |                 |                        | |check|              |                        | 7.12.5.4               | F.10.2.4                   |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| cospi     | |check|          |                 |                        |                      |                        | 7.12.4.12              | F.10.1.12                  |
+| cospi     | |check|          |                 |                        | |check|              |                        | 7.12.4.12              | F.10.1.12                  |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | dsqrt     | N/A              | N/A             |   |check|              | N/A                  |       |check|\*        | 7.12.14.6              | F.10.11                    |
 +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
diff --git a/libc/hdr/CMakeLists.txt b/libc/hdr/CMakeLists.txt
index 13dc892978bb87a..80545ee4b359f4e 100644
--- a/libc/hdr/CMakeLists.txt
+++ b/libc/hdr/CMakeLists.txt
@@ -51,10 +51,13 @@ add_proxy_header_library(
     libc.include.llvm-libc-macros.generic_error_number_macros
 )
 
+add_header_library(fcntl_overlay HDRS fcntl_overlay.h)
 add_proxy_header_library(
   fcntl_macros
   HDRS
     fcntl_macros.h
+  DEPENDS
+    .fcntl_overlay
   FULL_BUILD_DEPENDS
     libc.include.llvm-libc-macros.fcntl_macros
     libc.include.fcntl
diff --git a/libc/hdr/fcntl_macros.h b/libc/hdr/fcntl_macros.h
index 828cb984c0cb148..3a1ddeb0a2da1d9 100644
--- a/libc/hdr/fcntl_macros.h
+++ b/libc/hdr/fcntl_macros.h
@@ -15,7 +15,7 @@
 
 #else // Overlay mode
 
-#include <fcntl.h>
+#include "hdr/fcntl_overlay.h"
 
 #endif // LLVM_LIBC_FULL_BUILD
 
diff --git a/libc/hdr/fcntl_overlay.h b/libc/hdr/fcntl_overlay.h
new file mode 100644
index 000000000000000..c1cc98b0ebb2c7e
--- /dev/null
+++ b/libc/hdr/fcntl_overlay.h
@@ -0,0 +1,37 @@
+//===-- Including fcntl.h in overlay mode ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_FCNTL_OVERLAY_H
+#define LLVM_LIBC_HDR_FCNTL_OVERLAY_H
+
+#ifdef LIBC_FULL_BUILD
+#error "This header should only be included in overlay mode"
+#endif
+
+// Overlay mode
+
+// glibc <fcntl.h> header might provide extern inline definitions for few
+// functions, causing external alias errors.  They are guarded by
+// `__USE_FORTIFY_LEVEL`, which will be temporarily disabled
+// with `_FORTIFY_SOURCE`.
+
+#ifdef __USE_FORTIFY_LEVEL
+#define LIBC_OLD_USE_FORTIFY_LEVEL __USE_FORTIFY_LEVEL
+#undef __USE_FORTIFY_LEVEL
+#define __USE_FORTIFY_LEVEL 0
+#endif
+
+#include <fcntl.h>
+
+#ifdef LIBC_OLD_USE_FORTIFY_LEVEL
+#undef __USE_FORTIFY_LEVEL
+#define __USE_FORTIFY_LEVEL LIBC_OLD_USE_FORTIFY_LEVEL
+#undef LIBC_OLD_USE_FORTIFY_LEVEL
+#endif
+
+#endif // LLVM_LIBC_HDR_FCNTL_OVERLAY_H
diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt
index fab5245816bbe19..e45979857d79554 100644
--- a/libc/hdr/types/CMakeLists.txt
+++ b/libc/hdr/types/CMakeLists.txt
@@ -46,6 +46,17 @@ add_proxy_header_library(
     libc.include.llvm-libc-types.struct_timespec
 )
 
+add_proxy_header_library(
+  mode_t
+  HDRS
+    mode_t.h
+  DEPENDS
+    ../fcntl_overlay
+  FULL_BUILD_DEPENDS
+    libc.include.llvm-libc-types.mode_t
+    libc.include.fcntl
+)
+
 add_proxy_header_library(
   fenv_t
   HDRS
diff --git a/libc/hdr/types/mode_t.h b/libc/hdr/types/mode_t.h
new file mode 100644
index 000000000000000..abbbdb0a09d7b63
--- /dev/null
+++ b/libc/hdr/types/mode_t.h
@@ -0,0 +1,22 @@
+//===-- Definition of macros from mode_t.h --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_HDR_MODE_T_H
+#define LLVM_LIBC_HDR_MODE_T_H
+
+#ifdef LIBC_FULL_BUILD
+
+#include "include/llvm-libc-types/mode_t.h"
+
+#else // Overlay mode
+
+#include "hdr/fcntl_overlay.h"
+
+#endif // LLVM_LIBC_FULL_BUILD
+
+#endif // LLVM_LIBC_HDR_MODE_T_H
diff --git a/libc/newhdrgen/yaml/math.yaml b/libc/newhdrgen/yaml/math.yaml
index fe07803cff06f84..3cc4b599c777bff 100644
--- a/libc/newhdrgen/yaml/math.yaml
+++ b/libc/newhdrgen/yaml/math.yaml
@@ -206,6 +206,13 @@ functions:
     return_type: float
     arguments:
       - type: float
+  - name: cospif16
+    standards:
+      - stdc
+    return_type: _Float16
+    arguments:
+      - type: _Float16
+    guard: LIBC_TYPES_HAS_FLOAT16
   - name: coshf16
     standards:
       - stdc
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 4785895b562b5e3..14a3acff8fae935 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -277,6 +277,7 @@ add_header_library(
   DEPENDS
     .integer_to_string
     libc.src.__support.OSUtil.osutil
+    libc.src.__support.macros.optimization
 )
 
 add_header_library(
diff --git a/libc/src/__support/File/linux/CMakeLists.txt b/libc/src/__support/File/linux/CMakeLists.txt
index 5abbf11b3671cd2..84e3d5608361e19 100644
--- a/libc/src/__support/File/linux/CMakeLists.txt
+++ b/libc/src/__support/File/linux/CMakeLists.txt
@@ -7,7 +7,7 @@ add_object_library(
     file.h
     lseekImpl.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_syscall
     libc.include.sys_stat
     libc.src.__support.CPP.new
@@ -55,7 +55,7 @@ add_object_library(
   SRCS
     dir.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
     libc.src.__support.error_or
diff --git a/libc/src/__support/File/linux/dir.cpp b/libc/src/__support/File/linux/dir.cpp
index fc90ff097e4606d..5fe44fa8297b68d 100644
--- a/libc/src/__support/File/linux/dir.cpp
+++ b/libc/src/__support/File/linux/dir.cpp
@@ -12,7 +12,7 @@
 #include "src/__support/error_or.h"
 #include "src/__support/macros/config.h"
 
-#include <fcntl.h>       // For open flags
+#include "hdr/fcntl_macros.h" // For open flags
 #include <sys/syscall.h> // For syscall numbers
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/__support/File/linux/file.cpp b/libc/src/__support/File/linux/file.cpp
index 22292336f300e21..824c1f200e8c5b1 100644
--- a/libc/src/__support/File/linux/file.cpp
+++ b/libc/src/__support/File/linux/file.cpp
@@ -18,7 +18,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h" // For error macros
 
-#include <fcntl.h>       // For mode_t and other flags to the open syscall
+#include "hdr/fcntl_macros.h" // For mode_t and other flags to the open syscall
 #include <sys/stat.h>    // For S_IS*, S_IF*, and S_IR* flags.
 #include <sys/syscall.h> // For syscall numbers
 
diff --git a/libc/src/__support/libc_assert.h b/libc/src/__support/libc_assert.h
index e21a58a0c8aad94..3db179ff6721248 100644
--- a/libc/src/__support/libc_assert.h
+++ b/libc/src/__support/libc_assert.h
@@ -24,7 +24,8 @@
 #include "src/__support/OSUtil/exit.h"
 #include "src/__support/OSUtil/io.h"
 #include "src/__support/integer_to_string.h"
-#include "src/__support/macros/attributes.h" // For LIBC_INLINE
+#include "src/__support/macros/attributes.h"   // For LIBC_INLINE
+#include "src/__support/macros/optimization.h" // For LIBC_UNLIKELY
 
 namespace LIBC_NAMESPACE_DECL {
 
@@ -71,7 +72,7 @@ LIBC_INLINE void report_assertion_failure(const char *assertion,
 
 #define LIBC_ASSERT(COND)                                                      \
   do {                                                                         \
-    if (!(COND)) {                                                             \
+    if (LIBC_UNLIKELY(!(COND))) {                                              \
       LIBC_NAMESPACE::write_to_stderr(__FILE__ ":" __LIBC_LINE_STR__           \
                                                ": Assertion failed: '" #COND   \
                                                "' in function: '");            \
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index b6796f40adce7bd..fa11458f99b6c9c 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -79,7 +79,7 @@ add_object_library(
     .futex_utils
     libc.config.app_h
     libc.include.sys_syscall
-    libc.include.fcntl
+    libc.hdr.fcntl_macros  
     libc.src.errno.errno
     libc.src.__support.CPP.atomic
     libc.src.__support.CPP.stringstream
diff --git a/libc/src/__support/threads/linux/thread.cpp b/libc/src/__support/threads/linux/thread.cpp
index ee3f63fa3cde32e..c531d74c533550d 100644
--- a/libc/src/__support/threads/linux/thread.cpp
+++ b/libc/src/__support/threads/linux/thread.cpp
@@ -22,7 +22,7 @@
 #include <arm_acle.h>
 #endif
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <linux/param.h> // For EXEC_PAGESIZE.
 #include <linux/prctl.h> // For PR_SET_NAME
 #include <linux/sched.h> // For CLONE_* flags.
diff --git a/libc/src/assert/assert.h b/libc/src/assert/assert.h
index 6f352af1988b371..1ea19ea5554f0aa 100644
--- a/libc/src/assert/assert.h
+++ b/libc/src/assert/assert.h
@@ -18,8 +18,18 @@
 #ifdef NDEBUG
 #define assert(e) (void)0
 #else
+
+#ifdef __has_builtin
+#if __has_builtin(__builtin_expect)
+#define __LIBC_ASSERT_LIKELY(e) __builtin_expect(e, 1)
+#endif
+#endif
+#ifndef __LIBC_ASSERT_LIKELY
+#define __LIBC_ASSERT_LIKELY(e) e
+#endif
+
 #define assert(e)                                                              \
-  ((e) ? (void)0                                                               \
-       : LIBC_NAMESPACE::__assert_fail(#e, __FILE__, __LINE__,                 \
-                                       __PRETTY_FUNCTION__))
+  (__LIBC_ASSERT_LIKELY(e) ? (void)0                                           \
+                           : LIBC_NAMESPACE::__assert_fail(                    \
+                                 #e, __FILE__, __LINE__, __PRETTY_FUNCTION__))
 #endif // NDEBUG
diff --git a/libc/src/fcntl/creat.h b/libc/src/fcntl/creat.h
index e180e17c2578870..3e00427638a36a4 100644
--- a/libc/src/fcntl/creat.h
+++ b/libc/src/fcntl/creat.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_LIBC_SRC_FCNTL_CREAT_H
 #define LLVM_LIBC_SRC_FCNTL_CREAT_H
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
-#include <fcntl.h>
 
 namespace LIBC_NAMESPACE_DECL {
 
diff --git a/libc/src/fcntl/linux/CMakeLists.txt b/libc/src/fcntl/linux/CMakeLists.txt
index ee8ae63b8cf062a..580db16cd413205 100644
--- a/libc/src/fcntl/linux/CMakeLists.txt
+++ b/libc/src/fcntl/linux/CMakeLists.txt
@@ -5,7 +5,7 @@ add_entrypoint_object(
   HDRS
     ../creat.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.src.__support.OSUtil.osutil
     libc.src.errno.errno
 )
@@ -17,7 +17,7 @@ add_entrypoint_object(
   HDRS
     ../fcntl.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.src.__support.OSUtil.osutil
 )
 
@@ -28,7 +28,8 @@ add_entrypoint_object(
   HDRS
     ../open.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.types.mode_t
+    libc.hdr.fcntl_macros
     libc.src.__support.OSUtil.osutil
     libc.src.errno.errno
 )
@@ -40,7 +41,7 @@ add_entrypoint_object(
   HDRS
     ../openat.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.types.mode_t
     libc.src.__support.OSUtil.osutil
     libc.src.errno.errno
 )
diff --git a/libc/src/fcntl/linux/creat.cpp b/libc/src/fcntl/linux/creat.cpp
index 2c5b5d736a3be3f..23abae243aed92c 100644
--- a/libc/src/fcntl/linux/creat.cpp
+++ b/libc/src/fcntl/linux/creat.cpp
@@ -13,7 +13,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/fcntl/linux/open.cpp b/libc/src/fcntl/linux/open.cpp
index 79b7b2b32c887bb..8b699ecdd2043c7 100644
--- a/libc/src/fcntl/linux/open.cpp
+++ b/libc/src/fcntl/linux/open.cpp
@@ -13,7 +13,8 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
+#include "hdr/types/mode_t.h"
 #include <stdarg.h>
 #include <sys/syscall.h> // For syscall numbers.
 
diff --git a/libc/src/fcntl/linux/openat.cpp b/libc/src/fcntl/linux/openat.cpp
index 0862082c22ebfcd..6063d9c00ad6c40 100644
--- a/libc/src/fcntl/linux/openat.cpp
+++ b/libc/src/fcntl/linux/openat.cpp
@@ -13,7 +13,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/types/mode_t.h"
 #include <stdarg.h>
 #include <sys/syscall.h> // For syscall numbers.
 
diff --git a/libc/src/fcntl/open.h b/libc/src/fcntl/open.h
index 19bb53c2e320319..11f0ae53795318c 100644
--- a/libc/src/fcntl/open.h
+++ b/libc/src/fcntl/open.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_LIBC_SRC_FCNTL_OPEN_H
 #define LLVM_LIBC_SRC_FCNTL_OPEN_H
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
-#include <fcntl.h>
 
 namespace LIBC_NAMESPACE_DECL {
 
diff --git a/libc/src/fcntl/openat.h b/libc/src/fcntl/openat.h
index d09791a84f73529..051c8a2304dcbac 100644
--- a/libc/src/fcntl/openat.h
+++ b/libc/src/fcntl/openat.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_LIBC_SRC_FCNTL_OPENAT_H
 #define LLVM_LIBC_SRC_FCNTL_OPENAT_H
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
-#include <fcntl.h>
 
 namespace LIBC_NAMESPACE_DECL {
 
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index cb4817348cbba5e..80c1867d2116f62 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -95,6 +95,7 @@ add_math_entrypoint_object(coshf)
 add_math_entrypoint_object(coshf16)
 
 add_math_entrypoint_object(cospif)
+add_math_entrypoint_object(cospif16)
 
 add_math_entrypoint_object(daddl)
 add_math_entrypoint_object(daddf128)
diff --git a/libc/src/math/cbrt.h b/libc/src/math/cbrt.h
index a7d5fe80e57b3c4..8cf7d9b221df3f9 100644
--- a/libc/src/math/cbrt.h
+++ b/libc/src/math/cbrt.h
@@ -9,10 +9,12 @@
 #ifndef LLVM_LIBC_SRC_MATH_CBRT_H
 #define LLVM_LIBC_SRC_MATH_CBRT_H
 
-namespace LIBC_NAMESPACE {
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
 
 double cbrt(double x);
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC_MATH_CBRT_H
diff --git a/libc/src/math/cospif16.h b/libc/src/math/cospif16.h
new file mode 100644
index 000000000000000..ef9625dfed45f6f
--- /dev/null
+++ b/libc/src/math/cospif16.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for cospif16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_COSPIF16_H
+#define LLVM_LIBC_SRC_MATH_COSPIF16_H
+
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+float16 cospif16(float16 x);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_SINPIF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 35e7347b91362e4..ca27759d3212f27 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -351,6 +351,17 @@ add_header_library(
     libc.src.__support.common
 )
 
+add_header_library(
+  sincosf16_utils
+  HDRS
+    sincosf16_utils.h
+  DEPENDS
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.nearest_integer
+    libc.src.__support.common  
+)
+
 add_header_library(
   sincos_eval
   HDRS
@@ -422,6 +433,25 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  cospif16
+  SRCS
+    cospif16.cpp
+  HDRS
+    ../cospif16.h
+  DEPENDS
+    .sincosf16_utils
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
+    libc.src.__support.FPUtil.cast
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   sin
   SRCS
@@ -535,14 +565,14 @@ add_entrypoint_object(
   HDRS
     ../sinpif16.h
   DEPENDS
-    libc.src.__support.common
+    .sincosf16_utils
+    libc.hdr.errno_macros
+    libc.hdr.fenv_macros
     libc.src.__support.FPUtil.cast
     libc.src.__support.FPUtil.fenv_impl
-    libc.src.__support.FPUtil.fp_bits 
+    libc.src.__support.FPUtil.fp_bits
     libc.src.__support.FPUtil.multiply_add
-    libc.src.__support.FPUtil.nearest_integer
-    libc.src.__support.FPUtil.polyeval
-    libc.src.__support.macros.properties.types
+    libc.src.__support.macros.optimization
   COMPILE_OPTIONS
     -O3
 )
diff --git a/libc/src/math/generic/atan2.cpp b/libc/src/math/generic/atan2.cpp
index c39deebca4d40ec..1b16e15d29d0b31 100644
--- a/libc/src/math/generic/atan2.cpp
+++ b/libc/src/math/generic/atan2.cpp
@@ -230,8 +230,8 @@ LLVM_LIBC_FUNCTION(double, atan2, (double y, double x)) {
   if (LIBC_UNLIKELY(max_exp > 0x7ffU - 128U || min_exp < 128U)) {
     if (x_bits.is_nan() || y_bits.is_nan())
       return FPBits::quiet_nan().get_val();
-    unsigned x_except = x_abs == 0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1);
-    unsigned y_except = y_abs == 0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1);
+    unsigned x_except = x == 0.0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1);
+    unsigned y_except = y == 0.0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1);
 
     // Exceptional cases:
     //   EXCEPT[y_except][x_except][x_is_neg]
diff --git a/libc/src/math/generic/cbrt.cpp b/libc/src/math/generic/cbrt.cpp
index 036664c2aafaf46..ee7d69b2c211fac 100644
--- a/libc/src/math/generic/cbrt.cpp
+++ b/libc/src/math/generic/cbrt.cpp
@@ -151,9 +151,10 @@ LLVM_LIBC_FUNCTION(double, cbrt, (double x)) {
 
   if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() ||
                     x_abs >= FPBits::inf().uintval())) {
-    if (x_abs == 0 || x_abs >= FPBits::inf().uintval())
+    if (x == 0.0 || x_abs >= FPBits::inf().uintval())
       // x is 0, Inf, or NaN.
-      return x;
+      // Make sure it works for FTZ/DAZ modes.
+      return static_cast<double>(x + x);
 
     // x is non-zero denormal number.
     // Normalize x.
@@ -235,10 +236,10 @@ LLVM_LIBC_FUNCTION(double, cbrt, (double x)) {
 
   // Lambda function to update the exponent of the result.
   auto update_exponent = [=](double r) -> double {
-    uint64_t r_m = FPBits(r).uintval() & 0x800F'FFFF'FFFF'FFFF;
+    uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000;
     // Adjust exponent and sign.
     uint64_t r_bits =
-        r_m | (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN);
+        r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN);
     return FPBits(r_bits).get_val();
   };
 
diff --git a/libc/src/math/generic/cbrtf.cpp b/libc/src/math/generic/cbrtf.cpp
index 313961bf356b830..0abbf6e879421c0 100644
--- a/libc/src/math/generic/cbrtf.cpp
+++ b/libc/src/math/generic/cbrtf.cpp
@@ -93,9 +93,10 @@ LLVM_LIBC_FUNCTION(float, cbrtf, (float x)) {
   uint32_t x_abs = x_bits.uintval() & 0x7fff'ffff;
   uint32_t sign_bit = (x_bits.uintval() >> 31) << DoubleBits::EXP_LEN;
 
-  if (LIBC_UNLIKELY(x_abs == 0 || x_abs >= 0x7f80'0000)) {
+  if (LIBC_UNLIKELY(x == 0.0f || x_abs >= 0x7f80'0000)) {
     // x is 0, Inf, or NaN.
-    return x;
+    // Make sure it works for FTZ/DAZ modes.
+    return x + x;
   }
 
   double xd = static_cast<double>(x);
diff --git a/libc/src/math/generic/cospif16.cpp b/libc/src/math/generic/cospif16.cpp
new file mode 100644
index 000000000000000..dd8c7ab6afa3d6a
--- /dev/null
+++ b/libc/src/math/generic/cospif16.cpp
@@ -0,0 +1,81 @@
+//===-- Half-precision cospif function ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/cospif16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "sincosf16_utils.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/cast.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/macros/optimization.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+LLVM_LIBC_FUNCTION(float16, cospif16, (float16 x)) {
+  using FPBits = typename fputil::FPBits<float16>;
+  FPBits xbits(x);
+
+  uint16_t x_u = xbits.uintval();
+  uint16_t x_abs = x_u & 0x7fff;
+  float xf = x;
+
+  // Range reduction:
+  // For |x| > 1/32, we perform range reduction as follows:
+  // Find k and y such that:
+  //   x = (k + y) * 1/32
+  //   k is an integer
+  //   |y| < 0.5
+  //
+  // This is done by performing:
+  //   k = round(x * 32)
+  //   y = x * 32 - k
+  //
+  // Once k and y are computed, we then deduce the answer by the sine of sum
+  // formula:
+  //   cos(x * pi) = cos((k + y) * pi/32)
+  //               = cos(k * pi/32) * cos(y * pi/32) +
+  //                 sin(y * pi/32) * sin(k * pi/32)
+
+  // For signed zeros
+  if (LIBC_UNLIKELY(x_abs == 0U))
+    return fputil::cast<float16>(1.0f);
+
+  // Numbers greater or equal to 2^10 are integers, or infinity, or NaN
+  if (LIBC_UNLIKELY(x_abs >= 0x6400)) {
+    if (LIBC_UNLIKELY(x_abs <= 0x67FF))
+      return fputil::cast<float16>((x_abs & 0x1) ? -1.0f : 1.0f);
+
+    // Check for NaN or infintiy values
+    if (LIBC_UNLIKELY(x_abs >= 0x7c00)) {
+      // If value is equal to infinity
+      if (x_abs == 0x7c00) {
+        fputil::set_errno_if_required(EDOM);
+        fputil::raise_except_if_required(FE_INVALID);
+      }
+
+      return x + FPBits::quiet_nan().get_val();
+    }
+
+    return fputil::cast<float16>(1.0f);
+  }
+
+  float sin_k, cos_k, sin_y, cosm1_y;
+  sincospif16_eval(xf, sin_k, cos_k, sin_y, cosm1_y);
+
+  if (LIBC_UNLIKELY(sin_y == 0 && cos_k == 0))
+    return fputil::cast<float16>(0.0f);
+
+  // Since, cosm1_y = cos_y - 1, therefore:
+  // 	cos(x * pi) = cos_k(cosm1_y) + cos_k - sin_k * sin_y
+  return fputil::cast<float16>(fputil::multiply_add(
+      cos_k, cosm1_y, fputil::multiply_add(-sin_k, sin_y, cos_k)));
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/log.cpp b/libc/src/math/generic/log.cpp
index 57c70e31730bf6b..4302c64c8abac86 100644
--- a/libc/src/math/generic/log.cpp
+++ b/libc/src/math/generic/log.cpp
@@ -749,7 +749,7 @@ LLVM_LIBC_FUNCTION(double, log, (double x)) {
 
   if (LIBC_UNLIKELY(xbits.uintval() < FPBits_t::min_normal().uintval() ||
                     xbits.uintval() > FPBits_t::max_normal().uintval())) {
-    if (xbits.is_zero()) {
+    if (x == 0.0) {
       // return -Inf and raise FE_DIVBYZERO.
       fputil::set_errno_if_required(ERANGE);
       fputil::raise_except_if_required(FE_DIVBYZERO);
diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp
index b99b22b024fe3cc..7df57ef85b81b96 100644
--- a/libc/src/math/generic/log10.cpp
+++ b/libc/src/math/generic/log10.cpp
@@ -751,7 +751,7 @@ LLVM_LIBC_FUNCTION(double, log10, (double x)) {
 
   if (LIBC_UNLIKELY(xbits.uintval() < FPBits_t::min_normal().uintval() ||
                     xbits.uintval() > FPBits_t::max_normal().uintval())) {
-    if (xbits.is_zero()) {
+    if (x == 0.0) {
       // return -Inf and raise FE_DIVBYZERO.
       fputil::set_errno_if_required(ERANGE);
       fputil::raise_except_if_required(FE_DIVBYZERO);
diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp
index f7dd85cc08bf036..c635fa4ef9b63fa 100644
--- a/libc/src/math/generic/log10f.cpp
+++ b/libc/src/math/generic/log10f.cpp
@@ -164,7 +164,7 @@ LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
 
   if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval() ||
                     x_u > FPBits::max_normal().uintval())) {
-    if (xbits.is_zero()) {
+    if (x == 0.0f) {
       // Return -inf and raise FE_DIVBYZERO
       fputil::set_errno_if_required(ERANGE);
       fputil::raise_except_if_required(FE_DIVBYZERO);
diff --git a/libc/src/math/generic/log1p.cpp b/libc/src/math/generic/log1p.cpp
index f301a5aba3a57c4..43eb8a924aef476 100644
--- a/libc/src/math/generic/log1p.cpp
+++ b/libc/src/math/generic/log1p.cpp
@@ -927,8 +927,8 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
       //   log(1 + x) = nextafter(x, -inf) for FE_DOWNWARD, or
       //                                       FE_TOWARDZERO and x > 0,
       //              = x                  otherwise.
-      if (LIBC_UNLIKELY(xbits.is_zero()))
-        return x;
+      if (x == 0.0)
+        return x + x; // Handle FTZ/DAZ correctly.
 
       volatile float tp = 1.0f;
       volatile float tn = -1.0f;
@@ -943,7 +943,7 @@ LLVM_LIBC_FUNCTION(double, log1p, (double x)) {
         return FPBits_t(x_u + 1).get_val();
       }
 
-      return x;
+      return (x + x == 0.0) ? x + x : x;
     }
     x_dd = fputil::exact_add(1.0, x);
   }
diff --git a/libc/src/math/generic/log2.cpp b/libc/src/math/generic/log2.cpp
index 7d868e2f6f61985..37ea0c8f1343155 100644
--- a/libc/src/math/generic/log2.cpp
+++ b/libc/src/math/generic/log2.cpp
@@ -871,7 +871,7 @@ LLVM_LIBC_FUNCTION(double, log2, (double x)) {
 
   if (LIBC_UNLIKELY(xbits.uintval() < FPBits_t::min_normal().uintval() ||
                     xbits.uintval() > FPBits_t::max_normal().uintval())) {
-    if (xbits.is_zero()) {
+    if (x == 0.0) {
       // return -Inf and raise FE_DIVBYZERO.
       fputil::set_errno_if_required(ERANGE);
       fputil::raise_except_if_required(FE_DIVBYZERO);
diff --git a/libc/src/math/generic/log2f.cpp b/libc/src/math/generic/log2f.cpp
index 9cad02d796b189c..111f3f130bcab18 100644
--- a/libc/src/math/generic/log2f.cpp
+++ b/libc/src/math/generic/log2f.cpp
@@ -72,7 +72,7 @@ LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
   // Exceptional inputs.
   if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval() ||
                     x_u > FPBits::max_normal().uintval())) {
-    if (xbits.is_zero()) {
+    if (x == 0.0f) {
       fputil::set_errno_if_required(ERANGE);
       fputil::raise_except_if_required(FE_DIVBYZERO);
       return FPBits::inf(Sign::NEG).get_val();
diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp
index f8ecf320568ac71..30c00edafe21d86 100644
--- a/libc/src/math/generic/logf.cpp
+++ b/libc/src/math/generic/logf.cpp
@@ -82,7 +82,7 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
     }
     // Subnormal inputs.
     if (LIBC_UNLIKELY(x_u < FPBits::min_normal().uintval())) {
-      if (x_u == 0) {
+      if (x == 0.0f) {
         // Return -inf and raise FE_DIVBYZERO
         fputil::set_errno_if_required(ERANGE);
         fputil::raise_except_if_required(FE_DIVBYZERO);
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
index 181d3d40b3c9adf..213dbd959039c30 100644
--- a/libc/src/math/generic/pow.cpp
+++ b/libc/src/math/generic/pow.cpp
@@ -228,16 +228,18 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
                     x_u >= FPBits::inf().uintval() ||
                     x_u < FPBits::min_normal().uintval())) {
     // Exceptional exponents.
-    switch (y_a) {
-    case 0: // y = +-0.0
+    if (y == 0.0)
       return 1.0;
+
+    switch (y_a) {
     case 0x3fe0'0000'0000'0000: { // y = +-0.5
       // TODO: speed up x^(-1/2) with rsqrt(x) when available.
-      if (LIBC_UNLIKELY(!y_sign && (x_u == FPBits::zero(Sign::NEG).uintval() ||
-                                    x_u == FPBits::inf(Sign::NEG).uintval()))) {
+      if (LIBC_UNLIKELY(
+              (x == 0.0 || x_u == FPBits::inf(Sign::NEG).uintval()))) {
         // pow(-0, 1/2) = +0
         // pow(-inf, 1/2) = +inf
-        return FPBits(x_abs).get_val();
+        // Make sure it works correctly for FTZ/DAZ.
+        return y_sign ? 1.0 / (x * x) : (x * x);
       }
       return y_sign ? (1.0 / fputil::sqrt<double>(x)) : fputil::sqrt<double>(x);
     }
@@ -269,7 +271,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
           return 1.0;
         }
 
-        if (x_a == 0 && y_sign) {
+        if (x == 0.0 && y_sign) {
           // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
           fputil::set_errno_if_required(EDOM);
           fputil::raise_except_if_required(FE_DIVBYZERO);
@@ -298,7 +300,7 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
 
     // TODO: Speed things up with pow(2, y) = exp2(y) and pow(10, y) = exp10(y).
 
-    if (x_a == 0) {
+    if (x == 0.0) {
       bool out_is_neg = x_sign && is_odd_integer(y);
       if (y_sign) {
         // pow(0, negative number) = inf
diff --git a/libc/src/math/generic/powf.cpp b/libc/src/math/generic/powf.cpp
index 83477c6ef2aceba..c84ce0da34b10a3 100644
--- a/libc/src/math/generic/powf.cpp
+++ b/libc/src/math/generic/powf.cpp
@@ -529,10 +529,10 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
   // Hence x^y will either overflow or underflow if x is not zero.
   if (LIBC_UNLIKELY((y_abs & 0x0007'ffff) == 0) || (y_abs > 0x4f170000)) {
     // Exceptional exponents.
-    switch (y_abs) {
-    case 0x0000'0000: { // y = +-0.0f
+    if (y == 0.0f)
       return 1.0f;
-    }
+
+    switch (y_abs) {
     case 0x7f80'0000: { // y = +-Inf
       if (x_abs > 0x7f80'0000) {
         // pow(NaN, +-Inf) = NaN
@@ -542,7 +542,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
         // pow(+-1, +-Inf) = 1.0f
         return 1.0f;
       }
-      if (x_abs == 0 && y_u == 0xff80'0000) {
+      if (x == 0.0f && y_u == 0xff80'0000) {
         // pow(+-0, -Inf) = +inf and raise FE_DIVBYZERO
         fputil::set_errno_if_required(EDOM);
         fputil::raise_except_if_required(FE_DIVBYZERO);
@@ -561,12 +561,15 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
       switch (y_u) {
       case 0x3f00'0000: // y = 0.5f
         // pow(x, 1/2) = sqrt(x)
-        if (LIBC_UNLIKELY(x_u == 0x8000'0000 || x_u == 0xff80'0000)) {
+        if (LIBC_UNLIKELY(x == 0.0f || x_u == 0xff80'0000)) {
           // pow(-0, 1/2) = +0
           // pow(-inf, 1/2) = +inf
-          return FloatBits(x_abs).get_val();
+          // Make sure it is correct for FTZ/DAZ.
+          return x * x;
         }
-        return fputil::sqrt<float>(x);
+        float r;
+        r = fputil::sqrt<float>(x);
+        return (FloatBits(r).uintval() != 0x8000'0000) ? r : 0.0f;
       case 0x3f80'0000: // y = 1.0f
         return x;
       case 0x4000'0000: // y = 2.0f
@@ -634,8 +637,7 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
 
     const bool x_is_neg = x_u >= FloatBits::SIGN_MASK;
 
-    switch (x_abs) {
-    case 0x0000'0000: { // x = +-0.0f
+    if (x == 0.0f) {
       const bool out_is_neg =
           x_is_neg && is_odd_integer(FloatBits(y_u).get_val());
       if (y_u > 0x8000'0000U) {
@@ -647,7 +649,9 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
       // pow(0, positive number) = 0
       return out_is_neg ? -0.0f : 0.0f;
     }
-    case 0x7f80'0000: { // x = +-Inf
+
+    if (x_abs == 0x7f80'0000) {
+      // x = +-Inf
       const bool out_is_neg =
           x_is_neg && is_odd_integer(FloatBits(y_u).get_val());
       if (y_u >= FloatBits::SIGN_MASK) {
@@ -655,7 +659,6 @@ LLVM_LIBC_FUNCTION(float, powf, (float x, float y)) {
       }
       return FloatBits::inf(out_is_neg ? Sign::NEG : Sign::POS).get_val();
     }
-    }
 
     if (x_abs > 0x7f80'0000) {
       // x is NaN.
diff --git a/libc/src/math/generic/sin.cpp b/libc/src/math/generic/sin.cpp
index 2e1d3ffd5f37d80..b32486dff487cad 100644
--- a/libc/src/math/generic/sin.cpp
+++ b/libc/src/math/generic/sin.cpp
@@ -50,7 +50,7 @@ LLVM_LIBC_FUNCTION(double, sin, (double x)) {
       if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 26)) {
         // Signed zeros.
         if (LIBC_UNLIKELY(x == 0.0))
-          return x;
+          return x + x; // Make sure it works with FTZ/DAZ.
 
 #ifdef LIBC_TARGET_CPU_HAS_FMA
         return fputil::multiply_add(x, -0x1.0p-54, x);
diff --git a/libc/src/math/generic/sincosf16_utils.h b/libc/src/math/generic/sincosf16_utils.h
new file mode 100644
index 000000000000000..83511755a56c42d
--- /dev/null
+++ b/libc/src/math/generic/sincosf16_utils.h
@@ -0,0 +1,77 @@
+//===-- Collection of utils for sinf16/cosf16 -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H
+#define LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
+// Table is generated with Sollya as follows:
+// > display = hexadecimmal;
+// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); };
+constexpr float SIN_K_PI_OVER_32[64] = {
+    0x0.0p0,        0x1.917a6cp-4,  0x1.8f8b84p-3,  0x1.294062p-2,
+    0x1.87de2ap-2,  0x1.e2b5d4p-2,  0x1.1c73b4p-1,  0x1.44cf32p-1,
+    0x1.6a09e6p-1,  0x1.8bc806p-1,  0x1.a9b662p-1,  0x1.c38b3p-1,
+    0x1.d906bcp-1,  0x1.e9f416p-1,  0x1.f6297cp-1,  0x1.fd88dap-1,
+    0x1p0,          0x1.fd88dap-1,  0x1.f6297cp-1,  0x1.e9f416p-1,
+    0x1.d906bcp-1,  0x1.c38b3p-1,   0x1.a9b662p-1,  0x1.8bc806p-1,
+    0x1.6a09e6p-1,  0x1.44cf32p-1,  0x1.1c73b4p-1,  0x1.e2b5d4p-2,
+    0x1.87de2ap-2,  0x1.294062p-2,  0x1.8f8b84p-3,  0x1.917a6cp-4,
+    0x0.0p0,        -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2,
+    -0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1,
+    -0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1,
+    -0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1,
+    -0x1p0,         -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1,
+    -0x1.d906bcp-1, -0x1.c38b3p-1,  -0x1.a9b662p-1, -0x1.8bc806p-1,
+    -0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
+    -0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4};
+
+LIBC_INLINE int32_t range_reduction_sincospif16(float x, float &y) {
+  float kf = fputil::nearest_integer(x * 32);
+  y = fputil::multiply_add<float>(x, 32.0, -kf);
+
+  return static_cast<int32_t>(kf);
+}
+
+LIBC_INLINE void sincospif16_eval(float xf, float &sin_k, float &cos_k,
+                                  float &sin_y, float &cosm1_y) {
+  float y;
+  int32_t k = range_reduction_sincospif16(xf, y);
+
+  sin_k = SIN_K_PI_OVER_32[k & 63];
+  cos_k = SIN_K_PI_OVER_32[(k + 16) & 63];
+
+  // Recall, after range reduction, -0.5 <= y <= 0.5. For very small values of
+  // y, calculating sin(y * p/32) can be inaccurate. Generating a polynomial for
+  // sin(y * p/32)/y instead significantly reduces the relative errors.
+  float ysq = y * y;
+
+  // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya
+  // with:
+  // > Q = fpminimax(sin(y * pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]);
+  sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f,
+                               0x1.a03354p-21f, -0x1.ad02d2p-20f);
+
+  // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya
+  // with:
+  // > P = fpminimax(cos(y * pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]);
+  cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f,
+                                   0x1.a6f7a2p-29f);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_MATH_GENERIC_SINCOSF16_UTILS_H
diff --git a/libc/src/math/generic/sinpif16.cpp b/libc/src/math/generic/sinpif16.cpp
index 17cca583e0c0ec7..51ea595653b4da7 100644
--- a/libc/src/math/generic/sinpif16.cpp
+++ b/libc/src/math/generic/sinpif16.cpp
@@ -7,52 +7,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/math/sinpif16.h"
+#include "hdr/errno_macros.h"
+#include "hdr/fenv_macros.h"
+#include "sincosf16_utils.h"
 #include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/FPUtil/cast.h"
 #include "src/__support/FPUtil/multiply_add.h"
-#include "src/__support/FPUtil/nearest_integer.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-// Lookup table for sin(k * pi / 32) with k = 0, ..., 63.
-// Table is generated with Sollya as follows:
-// > display = hexadecimmal;
-// > for k from 0 to 63 do { round(sin(k * pi/32), SG, RN); };
-static constexpr float SIN_K_PI_OVER_32[64] = {
-    0x0.0p0,        0x1.917a6cp-4,  0x1.8f8b84p-3,  0x1.294062p-2,
-    0x1.87de2ap-2,  0x1.e2b5d4p-2,  0x1.1c73b4p-1,  0x1.44cf32p-1,
-    0x1.6a09e6p-1,  0x1.8bc806p-1,  0x1.a9b662p-1,  0x1.c38b3p-1,
-    0x1.d906bcp-1,  0x1.e9f416p-1,  0x1.f6297cp-1,  0x1.fd88dap-1,
-    0x1p0,          0x1.fd88dap-1,  0x1.f6297cp-1,  0x1.e9f416p-1,
-    0x1.d906bcp-1,  0x1.c38b3p-1,   0x1.a9b662p-1,  0x1.8bc806p-1,
-    0x1.6a09e6p-1,  0x1.44cf32p-1,  0x1.1c73b4p-1,  0x1.e2b5d4p-2,
-    0x1.87de2ap-2,  0x1.294062p-2,  0x1.8f8b84p-3,  0x1.917a6cp-4,
-    0x0.0p0,        -0x1.917a6cp-4, -0x1.8f8b84p-3, -0x1.294062p-2,
-    -0x1.87de2ap-2, -0x1.e2b5d4p-2, -0x1.1c73b4p-1, -0x1.44cf32p-1,
-    -0x1.6a09e6p-1, -0x1.8bc806p-1, -0x1.a9b662p-1, -0x1.c38b3p-1,
-    -0x1.d906bcp-1, -0x1.e9f416p-1, -0x1.f6297ep-1, -0x1.fd88dap-1,
-    -0x1p0,         -0x1.fd88dap-1, -0x1.f6297cp-1, -0x1.e9f416p-1,
-    -0x1.d906bcp-1, -0x1.c38b3p-1,  -0x1.a9b662p-1, -0x1.8bc806p-1,
-    -0x1.6a09e6p-1, -0x1.44cf32p-1, -0x1.1c73b4p-1, -0x1.e2b5d4p-2,
-    -0x1.87de2ap-2, -0x1.294062p-2, -0x1.8f8b84p-3, -0x1.917a6cp-4};
-
-static LIBC_INLINE int32_t range_reduction(float x, float &y) {
-  float kf = fputil::nearest_integer(x * 32);
-  y = fputil::multiply_add<float>(x, 32.0, -kf);
-
-  return static_cast<int32_t>(kf);
-}
-
 LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
   using FPBits = typename fputil::FPBits<float16>;
   FPBits xbits(x);
 
   uint16_t x_u = xbits.uintval();
   uint16_t x_abs = x_u & 0x7fff;
+  float xf = x;
 
   // Range reduction:
   // For |x| > 1/32, we perform range reduction as follows:
@@ -68,12 +39,8 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
   // Once k and y are computed, we then deduce the answer by the sine of sum
   // formula:
   //   sin(x * pi) = sin((k + y) * pi/32)
-  //           = sin(k * pi/32) * cos(y * pi/32) + sin (y * pi/32) * cos (k *
-  //           pi/32)
-  // The values of sin(k * pi/32) and cos (k * pi/32) for k = 0...63 are
-  // precomputed and stored using a vector of 64 single precision floats. sin(y
-  // * pi/32) and cos(y * pi/32) are computed using degree-9 chebyshev
-  // polynomials generated by Sollya.
+  //               = sin(k * pi/32) * cos(y * pi/32) +
+  //                 sin(y * pi/32) * cos(k * pi/32)
 
   // For signed zeros
   if (LIBC_UNLIKELY(x_abs == 0U))
@@ -94,36 +61,8 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
     return FPBits::zero(xbits.sign()).get_val();
   }
 
-  float f32 = x;
-  float y;
-  int32_t k = range_reduction(f32, y);
-
-  float sin_k = SIN_K_PI_OVER_32[k & 63];
-  float cos_k = SIN_K_PI_OVER_32[(k + 16) & 63];
-
-  // Recall;
-  // sin(x * pi/32) = sin((k + y) * pi/32)
-  // 		    = sin(y * pi/32) * cos(k * pi/32) + cos(y * pi/32) * sin(k *
-  // pi/32) Recall, after range reduction, -0.5 <= y <= 0.5. For very small
-  // values of y, calculating sin(y * p/32) can be inaccurate. Generating a
-  // polynomial for sin(y * p/32)/y instead significantly reduces the relative
-  // errors.
-  float ysq = y * y;
-
-  // Degree-6 minimax even polynomial for sin(y*pi/32)/y generated by Sollya
-  // with: > Q = fpminimax(sin(y*pi/32)/y, [|0, 2, 4, 6|], [|SG...|], [0, 0.5]);
-  float sin_y = y * fputil::polyeval(ysq, 0x1.921fb6p-4f, -0x1.4aeabcp-13f,
-                                     0x1.a03354p-21f, -0x1.ad02d2p-20f);
-
-  // Note that cosm1_y = cos(y*pi/32) - 1 = cos_y - 1
-  // Derivation:
-  // sin(x * pi) = sin((k + y) * pi/32)
-  //             = sin_y * cos_k + cos_y * sin_k
-  //             = cos_k * sin_y + sin_k * (1 + cos_y - 1)
-  // Degree-6 minimax even polynomial for cos(y*pi/32) generated by Sollya with:
-  // > P = fpminimax(cos(y*pi/32), [|0, 2, 4, 6|],[|1, SG...|], [0, 0.5]);
-  float cosm1_y = ysq * fputil::polyeval(ysq, -0x1.3bd3ccp-8f, 0x1.03a61ap-18f,
-                                         0x1.a6f7a2p-29f);
+  float sin_k, cos_k, sin_y, cosm1_y;
+  sincospif16_eval(xf, sin_k, cos_k, sin_y, cosm1_y);
 
   if (LIBC_UNLIKELY(sin_y == 0 && sin_k == 0))
     return FPBits::zero(xbits.sign()).get_val();
@@ -133,4 +72,5 @@ LLVM_LIBC_FUNCTION(float16, sinpif16, (float16 x)) {
   return fputil::cast<float16>(fputil::multiply_add(
       sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k)));
 }
+
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/tan.cpp b/libc/src/math/generic/tan.cpp
index f9be25ed866e1d0..19d31a8441efb68 100644
--- a/libc/src/math/generic/tan.cpp
+++ b/libc/src/math/generic/tan.cpp
@@ -138,7 +138,7 @@ LLVM_LIBC_FUNCTION(double, tan, (double x)) {
       if (LIBC_UNLIKELY(x_e < FPBits::EXP_BIAS - 27)) {
         // Signed zeros.
         if (LIBC_UNLIKELY(x == 0.0))
-          return x;
+          return x + x; // Make sure it works with FTZ/DAZ.
 
 #ifdef LIBC_TARGET_CPU_HAS_FMA
         return fputil::multiply_add(x, 0x1.0p-54, x);
diff --git a/libc/src/spawn/linux/CMakeLists.txt b/libc/src/spawn/linux/CMakeLists.txt
index 9ef3a9d18b0c690..26148fe1c76dbdc 100644
--- a/libc/src/spawn/linux/CMakeLists.txt
+++ b/libc/src/spawn/linux/CMakeLists.txt
@@ -5,7 +5,8 @@ add_entrypoint_object(
   HDRS
     ../posix_spawn.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.types.mode_t
+    libc.hdr.fcntl_macros
     libc.include.spawn
     libc.include.sys_syscall
     libc.include.signal
diff --git a/libc/src/spawn/linux/posix_spawn.cpp b/libc/src/spawn/linux/posix_spawn.cpp
index 4c0469b3ce384a2..fe82ba260148a61 100644
--- a/libc/src/spawn/linux/posix_spawn.cpp
+++ b/libc/src/spawn/linux/posix_spawn.cpp
@@ -14,7 +14,8 @@
 #include "src/__support/macros/config.h"
 #include "src/spawn/file_actions.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
+#include "hdr/types/mode_t.h"
 #include <signal.h> // For SIGCHLD
 #include <spawn.h>
 #include <sys/syscall.h> // For syscall numbers.
diff --git a/libc/src/stdio/gpu/fprintf.cpp b/libc/src/stdio/gpu/fprintf.cpp
index 6222589cc4bab96..46196d7d2b10f55 100644
--- a/libc/src/stdio/gpu/fprintf.cpp
+++ b/libc/src/stdio/gpu/fprintf.cpp
@@ -16,7 +16,7 @@
 
 #include <stdarg.h>
 
-namespace LIBC_NAMESPACE {
+namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, fprintf,
                    (::FILE *__restrict stream, const char *__restrict format,
@@ -29,4 +29,4 @@ LLVM_LIBC_FUNCTION(int, fprintf,
   return ret_val;
 }
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/gpu/printf.cpp b/libc/src/stdio/gpu/printf.cpp
index d9903193ef1658b..be1885fd6801d07 100644
--- a/libc/src/stdio/gpu/printf.cpp
+++ b/libc/src/stdio/gpu/printf.cpp
@@ -15,7 +15,7 @@
 
 #include <stdarg.h>
 
-namespace LIBC_NAMESPACE {
+namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) {
   va_list vlist;
@@ -26,4 +26,4 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) {
   return ret_val;
 }
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/gpu/vfprintf.cpp b/libc/src/stdio/gpu/vfprintf.cpp
index 961cfa48579e0af..c92685f48c728ba 100644
--- a/libc/src/stdio/gpu/vfprintf.cpp
+++ b/libc/src/stdio/gpu/vfprintf.cpp
@@ -14,7 +14,7 @@
 #include "src/errno/libc_errno.h"
 #include "src/stdio/gpu/vfprintf_utils.h"
 
-namespace LIBC_NAMESPACE {
+namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, vfprintf,
                    (::FILE *__restrict stream, const char *__restrict format,
@@ -24,4 +24,4 @@ LLVM_LIBC_FUNCTION(int, vfprintf,
   return ret_val;
 }
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/gpu/vfprintf_utils.h b/libc/src/stdio/gpu/vfprintf_utils.h
index 93ce1649869fc10..5010ee16d96074a 100644
--- a/libc/src/stdio/gpu/vfprintf_utils.h
+++ b/libc/src/stdio/gpu/vfprintf_utils.h
@@ -9,10 +9,11 @@
 #include "hdr/types/FILE.h"
 #include "src/__support/RPC/rpc_client.h"
 #include "src/__support/arg_list.h"
+#include "src/__support/macros/config.h"
 #include "src/stdio/gpu/file.h"
 #include "src/string/string_utils.h"
 
-namespace LIBC_NAMESPACE {
+namespace LIBC_NAMESPACE_DECL {
 
 template <uint16_t opcode>
 LIBC_INLINE int vfprintf_impl(::FILE *__restrict file,
@@ -82,4 +83,4 @@ LIBC_INLINE int vfprintf_internal(::FILE *__restrict stream,
 #endif
 }
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/gpu/vprintf.cpp b/libc/src/stdio/gpu/vprintf.cpp
index 2bb74d7f017b594..54012f3071844d5 100644
--- a/libc/src/stdio/gpu/vprintf.cpp
+++ b/libc/src/stdio/gpu/vprintf.cpp
@@ -13,7 +13,7 @@
 #include "src/errno/libc_errno.h"
 #include "src/stdio/gpu/vfprintf_utils.h"
 
-namespace LIBC_NAMESPACE {
+namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, vprintf,
                    (const char *__restrict format, va_list vlist)) {
@@ -22,4 +22,4 @@ LLVM_LIBC_FUNCTION(int, vprintf,
   return ret_val;
 }
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdio/linux/CMakeLists.txt b/libc/src/stdio/linux/CMakeLists.txt
index d6241e1ca0439d8..1b2fcb33ce54d7f 100644
--- a/libc/src/stdio/linux/CMakeLists.txt
+++ b/libc/src/stdio/linux/CMakeLists.txt
@@ -5,7 +5,7 @@ add_entrypoint_object(
   HDRS
     ../remove.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -22,6 +22,7 @@ add_entrypoint_object(
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
     libc.src.errno.errno
+    libc.hdr.fcntl_macros
 )
 
 add_entrypoint_object(
diff --git a/libc/src/stdio/linux/remove.cpp b/libc/src/stdio/linux/remove.cpp
index 9e299aaf43e450b..dbb4491d0e6cc1c 100644
--- a/libc/src/stdio/linux/remove.cpp
+++ b/libc/src/stdio/linux/remove.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h" // For AT_* macros.
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>       // For AT_* macros.
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/stdio/linux/rename.cpp b/libc/src/stdio/linux/rename.cpp
index 69fd22720ed1957..fbcb29be48f4e24 100644
--- a/libc/src/stdio/linux/rename.cpp
+++ b/libc/src/stdio/linux/rename.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/stdio/rename.h"
-#include "include/llvm-libc-macros/linux/fcntl-macros.h"
+#include "hdr/fcntl_macros.h"
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
diff --git a/libc/src/stdio/vsscanf.h b/libc/src/stdio/vsscanf.h
index 992c44d3d95b9a7..c57b1743e477e1b 100644
--- a/libc/src/stdio/vsscanf.h
+++ b/libc/src/stdio/vsscanf.h
@@ -9,12 +9,14 @@
 #ifndef LLVM_LIBC_SRC_STDIO_VSSCANF_H
 #define LLVM_LIBC_SRC_STDIO_VSSCANF_H
 
+#include "src/__support/macros/config.h"
+
 #include <stdarg.h>
 
-namespace LIBC_NAMESPACE {
+namespace LIBC_NAMESPACE_DECL {
 
 int vsscanf(const char *s, const char *format, va_list vlist);
 
-} // namespace LIBC_NAMESPACE
+} // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC_STDIO_VSSCANF_H
diff --git a/libc/src/sys/mman/linux/CMakeLists.txt b/libc/src/sys/mman/linux/CMakeLists.txt
index 11188254cfbd459..47c16f79bc8d580 100644
--- a/libc/src/sys/mman/linux/CMakeLists.txt
+++ b/libc/src/sys/mman/linux/CMakeLists.txt
@@ -187,8 +187,7 @@ add_entrypoint_object(
     ../shm_open.h
   DEPENDS
     libc.src.fcntl.open
-    libc.include.llvm-libc-macros.fcntl_macros
-    libc.include.llvm-libc-types.mode_t
+    libc.hdr.types.mode_t
     .shm_common
 )
 
diff --git a/libc/src/sys/mman/linux/shm_open.cpp b/libc/src/sys/mman/linux/shm_open.cpp
index d235e57aefdeb13..11de482272d00a6 100644
--- a/libc/src/sys/mman/linux/shm_open.cpp
+++ b/libc/src/sys/mman/linux/shm_open.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/sys/mman/shm_open.h"
-#include "llvm-libc-macros/fcntl-macros.h"
+#include "hdr/types/mode_t.h"
 #include "src/__support/macros/config.h"
 #include "src/fcntl/open.h"
 #include "src/sys/mman/linux/shm_common.h"
diff --git a/libc/src/sys/mman/shm_open.h b/libc/src/sys/mman/shm_open.h
index c890304aa4acf9b..1872dd30cb6f5e3 100644
--- a/libc/src/sys/mman/shm_open.h
+++ b/libc/src/sys/mman/shm_open.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_LIBC_SRC_SYS_MMAN_SHM_OPEN_H
 #define LLVM_LIBC_SRC_SYS_MMAN_SHM_OPEN_H
 
+#include "hdr/types/mode_t.h"
 #include "src/__support/macros/config.h"
-#include <llvm-libc-types/mode_t.h>
 
 namespace LIBC_NAMESPACE_DECL {
 
diff --git a/libc/src/sys/stat/linux/CMakeLists.txt b/libc/src/sys/stat/linux/CMakeLists.txt
index 415d2fa5c87715d..9aeb14636c2c1a5 100644
--- a/libc/src/sys/stat/linux/CMakeLists.txt
+++ b/libc/src/sys/stat/linux/CMakeLists.txt
@@ -5,7 +5,8 @@ add_entrypoint_object(
   HDRS
     ../chmod.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.types.mode_t
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -19,6 +20,7 @@ add_entrypoint_object(
   HDRS
     ../fchmod.h
   DEPENDS
+    libc.hdr.types.mode_t
     libc.include.sys_stat
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -45,7 +47,8 @@ add_entrypoint_object(
   HDRS
     ../mkdir.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.types.mode_t
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -84,7 +87,7 @@ add_entrypoint_object(
     ../stat.h
   DEPENDS
     .kernel_statx
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
 )
@@ -97,7 +100,7 @@ add_entrypoint_object(
     ../lstat.h
   DEPENDS
     .kernel_statx
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
 )
@@ -110,7 +113,7 @@ add_entrypoint_object(
     ../fstat.h
   DEPENDS
     .kernel_statx
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
 )
diff --git a/libc/src/sys/stat/linux/chmod.cpp b/libc/src/sys/stat/linux/chmod.cpp
index c91cabb514a8c9d..57d5bae6b81915c 100644
--- a/libc/src/sys/stat/linux/chmod.cpp
+++ b/libc/src/sys/stat/linux/chmod.cpp
@@ -11,9 +11,10 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
+#include "hdr/types/mode_t.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/syscall.h> // For syscall numbers.
 
diff --git a/libc/src/sys/stat/linux/fchmod.cpp b/libc/src/sys/stat/linux/fchmod.cpp
index 7b6c7b7091a8239..0d6fd359169aaff 100644
--- a/libc/src/sys/stat/linux/fchmod.cpp
+++ b/libc/src/sys/stat/linux/fchmod.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/types/mode_t.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/syscall.h> // For syscall numbers.
 
diff --git a/libc/src/sys/stat/linux/fstat.cpp b/libc/src/sys/stat/linux/fstat.cpp
index 411aa47bcda2ad3..35cf8f08f782d25 100644
--- a/libc/src/sys/stat/linux/fstat.cpp
+++ b/libc/src/sys/stat/linux/fstat.cpp
@@ -13,7 +13,7 @@
 
 #include "src/__support/common.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/sys/stat/linux/lstat.cpp b/libc/src/sys/stat/linux/lstat.cpp
index 5a6eff068d1dd79..354c5b6e029a44b 100644
--- a/libc/src/sys/stat/linux/lstat.cpp
+++ b/libc/src/sys/stat/linux/lstat.cpp
@@ -14,7 +14,7 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/sys/stat/linux/mkdir.cpp b/libc/src/sys/stat/linux/mkdir.cpp
index 527c3d2058d2b72..b319b5c8393de75 100644
--- a/libc/src/sys/stat/linux/mkdir.cpp
+++ b/libc/src/sys/stat/linux/mkdir.cpp
@@ -11,9 +11,10 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
+#include "hdr/types/mode_t.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/syscall.h> // For syscall numbers.
 
diff --git a/libc/src/sys/stat/linux/stat.cpp b/libc/src/sys/stat/linux/stat.cpp
index c5149e6e3c88393..de9cdb197d687ce 100644
--- a/libc/src/sys/stat/linux/stat.cpp
+++ b/libc/src/sys/stat/linux/stat.cpp
@@ -13,7 +13,7 @@
 
 #include "src/__support/common.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt
index 9b0d752cefbd8e5..472438ca72e49e4 100644
--- a/libc/src/unistd/linux/CMakeLists.txt
+++ b/libc/src/unistd/linux/CMakeLists.txt
@@ -5,6 +5,7 @@ add_entrypoint_object(
   HDRS
     ../access.h
   DEPENDS
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -57,7 +58,7 @@ add_entrypoint_object(
   HDRS
     ../dup2.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -254,7 +255,7 @@ add_entrypoint_object(
   HDRS
     ../link.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -268,7 +269,7 @@ add_entrypoint_object(
   HDRS
     ../linkat.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -377,7 +378,7 @@ add_entrypoint_object(
   HDRS
     ../rmdir.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -391,7 +392,7 @@ add_entrypoint_object(
   HDRS
     ../readlink.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -405,7 +406,7 @@ add_entrypoint_object(
   HDRS
     ../readlinkat.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -419,7 +420,7 @@ add_entrypoint_object(
   HDRS
     ../symlink.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -433,7 +434,7 @@ add_entrypoint_object(
   HDRS
     ../symlinkat.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -485,7 +486,7 @@ add_entrypoint_object(
   HDRS
     ../unlink.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
@@ -499,7 +500,7 @@ add_entrypoint_object(
   HDRS
     ../unlinkat.h
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.include.sys_syscall
     libc.src.__support.OSUtil.osutil
diff --git a/libc/src/unistd/linux/access.cpp b/libc/src/unistd/linux/access.cpp
index e9ad74989b05636..2f7ebbcdf9e810d 100644
--- a/libc/src/unistd/linux/access.cpp
+++ b/libc/src/unistd/linux/access.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/dup2.cpp b/libc/src/unistd/linux/dup2.cpp
index 51a19a71a7d854c..c7c7c1a8ca786f3 100644
--- a/libc/src/unistd/linux/dup2.cpp
+++ b/libc/src/unistd/linux/dup2.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/link.cpp b/libc/src/unistd/linux/link.cpp
index 37ca58eab1096dc..477806a70df7427 100644
--- a/libc/src/unistd/linux/link.cpp
+++ b/libc/src/unistd/linux/link.cpp
@@ -13,7 +13,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/linkat.cpp b/libc/src/unistd/linux/linkat.cpp
index fcd6a5f75a196b4..40f68cc90c4809a 100644
--- a/libc/src/unistd/linux/linkat.cpp
+++ b/libc/src/unistd/linux/linkat.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/readlink.cpp b/libc/src/unistd/linux/readlink.cpp
index 7b152450044054a..2055e6b3400f2f5 100644
--- a/libc/src/unistd/linux/readlink.cpp
+++ b/libc/src/unistd/linux/readlink.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/readlinkat.cpp b/libc/src/unistd/linux/readlinkat.cpp
index 19a9ff9fbeb72ab..e5e4d0d39bc9cf5 100644
--- a/libc/src/unistd/linux/readlinkat.cpp
+++ b/libc/src/unistd/linux/readlinkat.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/rmdir.cpp b/libc/src/unistd/linux/rmdir.cpp
index 8974468ebcf16a3..075af12af64c5c4 100644
--- a/libc/src/unistd/linux/rmdir.cpp
+++ b/libc/src/unistd/linux/rmdir.cpp
@@ -13,7 +13,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/symlink.cpp b/libc/src/unistd/linux/symlink.cpp
index 5efd4df85edabde..9e1b2886ea0f5f0 100644
--- a/libc/src/unistd/linux/symlink.cpp
+++ b/libc/src/unistd/linux/symlink.cpp
@@ -13,7 +13,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/symlinkat.cpp b/libc/src/unistd/linux/symlinkat.cpp
index 63d2e6d1507a573..bcf2d0f8cc0551b 100644
--- a/libc/src/unistd/linux/symlinkat.cpp
+++ b/libc/src/unistd/linux/symlinkat.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/unlink.cpp b/libc/src/unistd/linux/unlink.cpp
index de7cae8b826ebc8..72d8e2398e3d761 100644
--- a/libc/src/unistd/linux/unlink.cpp
+++ b/libc/src/unistd/linux/unlink.cpp
@@ -11,9 +11,9 @@
 #include "src/__support/OSUtil/syscall.h" // For internal syscall function.
 #include "src/__support/common.h"
 
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
-#include <fcntl.h>
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/unistd/linux/unlinkat.cpp b/libc/src/unistd/linux/unlinkat.cpp
index e794f242b9459ad..4ed20f542f17023 100644
--- a/libc/src/unistd/linux/unlinkat.cpp
+++ b/libc/src/unistd/linux/unlinkat.cpp
@@ -13,7 +13,7 @@
 #include "src/__support/macros/config.h"
 #include "src/errno/libc_errno.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/syscall.h> // For syscall numbers.
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/test/src/fcntl/CMakeLists.txt b/libc/test/src/fcntl/CMakeLists.txt
index 48048b7fe88666d..b522fef7439df74 100644
--- a/libc/test/src/fcntl/CMakeLists.txt
+++ b/libc/test/src/fcntl/CMakeLists.txt
@@ -42,7 +42,7 @@ add_libc_unittest(
   SRCS
     openat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.src.errno.errno
     libc.src.fcntl.open
     libc.src.fcntl.openat
diff --git a/libc/test/src/fcntl/openat_test.cpp b/libc/test/src/fcntl/openat_test.cpp
index 9dafd125224a406..547359eb9f7a9df 100644
--- a/libc/test/src/fcntl/openat_test.cpp
+++ b/libc/test/src/fcntl/openat_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 
 TEST(LlvmLibcUniStd, OpenAndReadTest) {
   using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 262c717dd27d558..b46ef4028915ba4 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -45,6 +45,17 @@ add_fp_unittest(
 )
 
 
+add_fp_unittest(
+  cospif16_test
+  NEED_MPFR
+  SUITE
+    libc-math-unittests
+  SRCS
+    cospif16_test.cpp
+  DEPENDS
+    libc.src.math.cospif16
+)
+
 add_fp_unittest(
   daddl_test
   NEED_MPFR
diff --git a/libc/test/src/math/cbrt_test.cpp b/libc/test/src/math/cbrt_test.cpp
index 2ef2140966f52c5..2e2de16fc859d12 100644
--- a/libc/test/src/math/cbrt_test.cpp
+++ b/libc/test/src/math/cbrt_test.cpp
@@ -87,12 +87,13 @@ TEST_F(LlvmLibcCbrtTest, InDoubleRange) {
 
 TEST_F(LlvmLibcCbrtTest, SpecialValues) {
   constexpr double INPUTS[] = {
-      0x1.4f61672324c8p-1028, 0x1.00152f57068b7p-1, 0x1.006509cda9886p-1,
-      0x1.018369b92e523p-1,   0x1.10af932ef2bf9p-1, 0x1.1a41117939fdbp-1,
-      0x1.2ae8076520d9ap-1,   0x1.a202bfc89ddffp-1, 0x1.a6bb8c803147bp-1,
-      0x1.000197b499b1bp+0,   0x1.00065ed266c6cp+0, 0x1.d4306c202c4c2p+0,
-      0x1.8fd409efe4851p+1,   0x1.95fd0eb31cc4p+1,  0x1.7cef1d276e335p+2,
-      0x1.94910c4fc98p+2,     0x1.a0cc1327bb4c4p+2, 0x1.e7d6ebed549c4p+2,
+      0x1.4f61672324c8p-1028, -0x1.fffffffffffffp-1021, 0x1.00152f57068b7p-1,
+      0x1.006509cda9886p-1,   0x1.018369b92e523p-1,     0x1.10af932ef2bf9p-1,
+      0x1.1a41117939fdbp-1,   0x1.2ae8076520d9ap-1,     0x1.a202bfc89ddffp-1,
+      0x1.a6bb8c803147bp-1,   0x1.000197b499b1bp+0,     0x1.00065ed266c6cp+0,
+      0x1.d4306c202c4c2p+0,   0x1.8fd409efe4851p+1,     0x1.95fd0eb31cc4p+1,
+      0x1.7cef1d276e335p+2,   0x1.94910c4fc98p+2,       0x1.a0cc1327bb4c4p+2,
+      0x1.e7d6ebed549c4p+2,
   };
   for (double v : INPUTS) {
     double x = FPBits(v).get_val();
diff --git a/libc/test/src/math/cospif16_test.cpp b/libc/test/src/math/cospif16_test.cpp
new file mode 100644
index 000000000000000..6a32498b0570ac6
--- /dev/null
+++ b/libc/test/src/math/cospif16_test.cpp
@@ -0,0 +1,40 @@
+//===-- Exhaustive test for cospif16 --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/cospif16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+using LlvmLibcCospif16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+// Range: [0, Inf]
+static constexpr uint16_t POS_START = 0x0000U;
+static constexpr uint16_t POS_STOP = 0x7c00U;
+
+// Range: [-Inf, 0]
+static constexpr uint16_t NEG_START = 0x8000U;
+static constexpr uint16_t NEG_STOP = 0xfc00U;
+
+TEST_F(LlvmLibcCospif16Test, PositiveRange) {
+  for (uint16_t v = POS_START; v <= POS_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cospi, x,
+                                   LIBC_NAMESPACE::cospif16(x), 0.5);
+  }
+}
+
+TEST_F(LlvmLibcCospif16Test, NegativeRange) {
+  for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) {
+    float16 x = FPBits(v).get_val();
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cospi, x,
+                                   LIBC_NAMESPACE::cospif16(x), 0.5);
+  }
+}
diff --git a/libc/test/src/math/exhaustive/sinpif_test.cpp b/libc/test/src/math/exhaustive/sinpif_test.cpp
index 8bc1d81eb7e3d26..81abac0b73f27a0 100644
--- a/libc/test/src/math/exhaustive/sinpif_test.cpp
+++ b/libc/test/src/math/exhaustive/sinpif_test.cpp
@@ -7,10 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "exhaustive_test.h"
-#include "mpfr.h"
 #include "src/math/sinpif.h"
 #include "utils/MPFRWrapper/MPFRUtils.h"
-#include <sys/types.h>
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
index b2d1871541efc99..269e92c59006281 100644
--- a/libc/test/src/math/smoke/CMakeLists.txt
+++ b/libc/test/src/math/smoke/CMakeLists.txt
@@ -25,6 +25,17 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+  cospif16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    cospif16_test.cpp
+  DEPENDS
+    libc.src.errno.errno
+    libc.src.math.cospif16
+)
+
 add_fp_unittest(
   sinf_test
   SUITE
diff --git a/libc/test/src/math/smoke/HypotTest.h b/libc/test/src/math/smoke/HypotTest.h
index d7c62dcbeb0edb1..30d57a4fe2a2672 100644
--- a/libc/test/src/math/smoke/HypotTest.h
+++ b/libc/test/src/math/smoke/HypotTest.h
@@ -14,13 +14,11 @@
 #include "test/UnitTest/Test.h"
 
 template <typename T>
-class HypotTestTemplate : public LIBC_NAMESPACE::testing::Test {
-private:
+struct HypotTestTemplate : public LIBC_NAMESPACE::testing::Test {
   using Func = T (*)(T, T);
 
   DECLARE_SPECIAL_CONSTANTS(T)
 
-public:
   void test_special_numbers(Func func) {
     constexpr int N = 4;
     // Pythagorean triples.
diff --git a/libc/test/src/math/smoke/acosf_test.cpp b/libc/test/src/math/smoke/acosf_test.cpp
index 039d8c2013830d1..e5d56c70f27221d 100644
--- a/libc/test/src/math/smoke/acosf_test.cpp
+++ b/libc/test/src/math/smoke/acosf_test.cpp
@@ -38,3 +38,27 @@ TEST_F(LlvmLibcAcosfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acosf(-2.0f));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAcosfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0x1.921fb6p0f, LIBC_NAMESPACE::acosf(min_denormal));
+}
+
+TEST_F(LlvmLibcAcosfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0x1.921fb6p0f, LIBC_NAMESPACE::acosf(min_denormal));
+}
+
+TEST_F(LlvmLibcAcosfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0x1.921fb6p0f, LIBC_NAMESPACE::acosf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/acoshf_test.cpp b/libc/test/src/math/smoke/acoshf_test.cpp
index 91d433df80558d5..c4e88259919c3cc 100644
--- a/libc/test/src/math/smoke/acoshf_test.cpp
+++ b/libc/test/src/math/smoke/acoshf_test.cpp
@@ -35,3 +35,27 @@ TEST_F(LlvmLibcAcoshfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::acoshf(neg_inf));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAcoshfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_IS_NAN(LIBC_NAMESPACE::acoshf(min_denormal));
+}
+
+TEST_F(LlvmLibcAcoshfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_IS_NAN(LIBC_NAMESPACE::acoshf(min_denormal));
+}
+
+TEST_F(LlvmLibcAcoshfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_IS_NAN(LIBC_NAMESPACE::acoshf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/asinf_test.cpp b/libc/test/src/math/smoke/asinf_test.cpp
index 450255ccd3020dc..ce1576e2b57dfca 100644
--- a/libc/test/src/math/smoke/asinf_test.cpp
+++ b/libc/test/src/math/smoke/asinf_test.cpp
@@ -41,3 +41,27 @@ TEST_F(LlvmLibcAsinfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::asinf(-2.0f));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAsinfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinf(min_denormal));
+}
+
+TEST_F(LlvmLibcAsinfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinf(min_denormal));
+}
+
+TEST_F(LlvmLibcAsinfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/asinhf_test.cpp b/libc/test/src/math/smoke/asinhf_test.cpp
index a8e54f379a1fd01..5b83ce6466113f9 100644
--- a/libc/test/src/math/smoke/asinhf_test.cpp
+++ b/libc/test/src/math/smoke/asinhf_test.cpp
@@ -35,3 +35,27 @@ TEST_F(LlvmLibcAsinhfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, LIBC_NAMESPACE::asinhf(neg_inf));
   EXPECT_MATH_ERRNO(0);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAsinhfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinhf(min_denormal));
+}
+
+TEST_F(LlvmLibcAsinhfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinhf(min_denormal));
+}
+
+TEST_F(LlvmLibcAsinhfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::asinhf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/atan2_test.cpp b/libc/test/src/math/smoke/atan2_test.cpp
index 61dd6cab1049fe2..1606c3f378cb88c 100644
--- a/libc/test/src/math/smoke/atan2_test.cpp
+++ b/libc/test/src/math/smoke/atan2_test.cpp
@@ -20,3 +20,40 @@ TEST_F(LlvmLibcAtan2Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(0.0, LIBC_NAMESPACE::atan2(1.0, inf));
   EXPECT_FP_EQ_ALL_ROUNDING(-0.0, LIBC_NAMESPACE::atan2(-1.0, inf));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAtan2Test, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0x1.921fb54442d18p-1,
+               LIBC_NAMESPACE::atan2(min_denormal, min_denormal));
+  EXPECT_FP_EQ(0x1.0000000000001p-52,
+               LIBC_NAMESPACE::atan2(min_denormal, max_denormal));
+  EXPECT_FP_EQ(0x1.921fb54442d17p0,
+               LIBC_NAMESPACE::atan2(max_denormal, min_denormal));
+  EXPECT_FP_EQ(0x1.921fb54442d18p-1,
+               LIBC_NAMESPACE::atan2(max_denormal, max_denormal));
+}
+
+TEST_F(LlvmLibcAtan2Test, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, max_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, max_denormal));
+}
+
+TEST_F(LlvmLibcAtan2Test, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(min_denormal, max_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::atan2(max_denormal, max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/atanf_test.cpp b/libc/test/src/math/smoke/atanf_test.cpp
index 0fe11d79533810c..346b8e8abd19919 100644
--- a/libc/test/src/math/smoke/atanf_test.cpp
+++ b/libc/test/src/math/smoke/atanf_test.cpp
@@ -42,3 +42,27 @@ TEST_F(LlvmLibcAtanfTest, SpecialNumbers) {
   // EXPECT_FP_EXCEPTION(0);
   EXPECT_MATH_ERRNO(0);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAtanfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanf(min_denormal));
+}
+
+TEST_F(LlvmLibcAtanfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanf(min_denormal));
+}
+
+TEST_F(LlvmLibcAtanfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/atanhf_test.cpp b/libc/test/src/math/smoke/atanhf_test.cpp
index e22926bd2f03762..8300b47ea9a3151 100644
--- a/libc/test/src/math/smoke/atanhf_test.cpp
+++ b/libc/test/src/math/smoke/atanhf_test.cpp
@@ -76,3 +76,27 @@ TEST_F(LlvmLibcAtanhfTest, SpecialNumbers) {
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::atanhf(neg_inf), FE_INVALID);
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcAtanhfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanhf(min_denormal));
+}
+
+TEST_F(LlvmLibcAtanhfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanhf(min_denormal));
+}
+
+TEST_F(LlvmLibcAtanhfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::atanhf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/cbrt_test.cpp b/libc/test/src/math/smoke/cbrt_test.cpp
index 724e0e979decc18..092e6dd1aeed32b 100644
--- a/libc/test/src/math/smoke/cbrt_test.cpp
+++ b/libc/test/src/math/smoke/cbrt_test.cpp
@@ -32,4 +32,33 @@ TEST_F(LlvmLibcCbrtTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p42, LIBC_NAMESPACE::cbrt(-0x1.0p126));
   EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p341, LIBC_NAMESPACE::cbrt(0x1.0p1023));
   EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p341, LIBC_NAMESPACE::cbrt(-0x1.0p1023));
+  EXPECT_FP_EQ(-0x1.0p-340, LIBC_NAMESPACE::cbrt(-0x1.fffffffffffffp-1021));
+  EXPECT_FP_EQ(2.0, LIBC_NAMESPACE::cbrt(0x1.fffffffffffffp2));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcCbrtTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0x1.0p-358, LIBC_NAMESPACE::cbrt(min_denormal));
+  EXPECT_FP_EQ(0x1.428a2f98d728ap-341, LIBC_NAMESPACE::cbrt(max_denormal));
+}
+
+TEST_F(LlvmLibcCbrtTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(max_denormal));
+}
+
+TEST_F(LlvmLibcCbrtTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::cbrt(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/cbrtf_test.cpp b/libc/test/src/math/smoke/cbrtf_test.cpp
index a68e57744bd0e78..202a5ce0733585d 100644
--- a/libc/test/src/math/smoke/cbrtf_test.cpp
+++ b/libc/test/src/math/smoke/cbrtf_test.cpp
@@ -31,3 +31,30 @@ TEST_F(LlvmLibcCbrtfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(0x1.0p42f, LIBC_NAMESPACE::cbrtf(0x1.0p126f));
   EXPECT_FP_EQ_ALL_ROUNDING(-0x1.0p42f, LIBC_NAMESPACE::cbrtf(-0x1.0p126f));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcCbrtfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0x1.428a3p-50f, LIBC_NAMESPACE::cbrtf(min_denormal));
+  EXPECT_FP_EQ(0x1.fffffep-43f, LIBC_NAMESPACE::cbrtf(max_denormal));
+}
+
+TEST_F(LlvmLibcCbrtfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(max_denormal));
+}
+
+TEST_F(LlvmLibcCbrtfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::cbrtf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/cos_test.cpp b/libc/test/src/math/smoke/cos_test.cpp
index 81c8612dba26e5c..88d8ead1af99221 100644
--- a/libc/test/src/math/smoke/cos_test.cpp
+++ b/libc/test/src/math/smoke/cos_test.cpp
@@ -24,3 +24,30 @@ TEST_F(LlvmLibcCosTest, SpecialNumbers) {
   EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_normal));
   EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcCosTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(max_denormal));
+}
+
+TEST_F(LlvmLibcCosTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(max_denormal));
+}
+
+TEST_F(LlvmLibcCosTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::cos(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/cosf_test.cpp b/libc/test/src/math/smoke/cosf_test.cpp
index 62f7ede9cf17810..2e261f9fac3c0cf 100644
--- a/libc/test/src/math/smoke/cosf_test.cpp
+++ b/libc/test/src/math/smoke/cosf_test.cpp
@@ -35,3 +35,30 @@ TEST_F(LlvmLibcCosfTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::cosf(neg_inf));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcCosfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(max_denormal));
+}
+
+TEST_F(LlvmLibcCosfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(max_denormal));
+}
+
+TEST_F(LlvmLibcCosfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cosf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/coshf_test.cpp b/libc/test/src/math/smoke/coshf_test.cpp
index ddaa19f4c392f7d..fd1556b10116d9a 100644
--- a/libc/test/src/math/smoke/coshf_test.cpp
+++ b/libc/test/src/math/smoke/coshf_test.cpp
@@ -51,3 +51,30 @@ TEST_F(LlvmLibcCoshfTest, Overflow) {
       inf, LIBC_NAMESPACE::coshf(FPBits(0x42d00008U).get_val()), FE_OVERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcCoshfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(max_denormal));
+}
+
+TEST_F(LlvmLibcCoshfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(max_denormal));
+}
+
+TEST_F(LlvmLibcCoshfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::coshf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/cospif16_test.cpp b/libc/test/src/math/smoke/cospif16_test.cpp
new file mode 100644
index 000000000000000..f6d7483393191fd
--- /dev/null
+++ b/libc/test/src/math/smoke/cospif16_test.cpp
@@ -0,0 +1,44 @@
+//===-- Unittests for cospif16 --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/errno/libc_errno.h"
+#include "src/math/cospif16.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+
+using LlvmLibcCospif16Test = LIBC_NAMESPACE::testing::FPTest<float16>;
+
+TEST_F(LlvmLibcCospif16Test, SpecialNumbers) {
+  LIBC_NAMESPACE::libc_errno = 0;
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif16(aNaN));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(zero));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(neg_zero));
+  EXPECT_MATH_ERRNO(0);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif16(inf));
+  EXPECT_MATH_ERRNO(EDOM);
+
+  EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif16(neg_inf));
+  EXPECT_MATH_ERRNO(EDOM);
+}
+
+TEST_F(LlvmLibcCospif16Test, Integers) {
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(-0x420));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif16(-0x1.4p+14));
+  EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::cospif16(0x421));
+  EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::cospif16(0x333));
+  EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(-0x1.28p4));
+  EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(-0x1.ffcp9));
+  EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(0x1.01p7));
+  EXPECT_FP_EQ(zero, LIBC_NAMESPACE::cospif16(0x1.f6cp9));
+}
diff --git a/libc/test/src/math/smoke/cospif_test.cpp b/libc/test/src/math/smoke/cospif_test.cpp
index 007c4c45e3b1570..bf6d86bcfe623af 100644
--- a/libc/test/src/math/smoke/cospif_test.cpp
+++ b/libc/test/src/math/smoke/cospif_test.cpp
@@ -32,3 +32,30 @@ TEST_F(LlvmLibcCospifTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::cospif(neg_inf));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcCospifTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(max_denormal));
+}
+
+TEST_F(LlvmLibcCospifTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(max_denormal));
+}
+
+TEST_F(LlvmLibcCospifTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::cospif(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/erff_test.cpp b/libc/test/src/math/smoke/erff_test.cpp
index 8a970f3a4b7ed19..7d2c1013752c7c0 100644
--- a/libc/test/src/math/smoke/erff_test.cpp
+++ b/libc/test/src/math/smoke/erff_test.cpp
@@ -23,3 +23,30 @@ TEST_F(LlvmLibcErffTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::erff(zero));
   EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::erff(neg_zero));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcErffTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(min_denormal));
+  EXPECT_FP_EQ(0x1.20dd72p-126f, LIBC_NAMESPACE::erff(max_denormal));
+}
+
+TEST_F(LlvmLibcErffTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(max_denormal));
+}
+
+TEST_F(LlvmLibcErffTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::erff(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/exp10_test.cpp b/libc/test/src/math/smoke/exp10_test.cpp
index 282ddc987b49933..ca9fc359edeb5a4 100644
--- a/libc/test/src/math/smoke/exp10_test.cpp
+++ b/libc/test/src/math/smoke/exp10_test.cpp
@@ -32,3 +32,30 @@ TEST_F(LlvmLibcExp10Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(100.0, LIBC_NAMESPACE::exp10(2.0));
   EXPECT_FP_EQ_ALL_ROUNDING(1000.0, LIBC_NAMESPACE::exp10(3.0));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExp10Test, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(max_denormal));
+}
+
+TEST_F(LlvmLibcExp10Test, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(max_denormal));
+}
+
+TEST_F(LlvmLibcExp10Test, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp10(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/exp10f_test.cpp b/libc/test/src/math/smoke/exp10f_test.cpp
index 9fb15ae75348bb8..bcbfc96efd72689 100644
--- a/libc/test/src/math/smoke/exp10f_test.cpp
+++ b/libc/test/src/math/smoke/exp10f_test.cpp
@@ -54,3 +54,30 @@ TEST_F(LlvmLibcExp10fTest, Overflow) {
       inf, LIBC_NAMESPACE::exp10f(FPBits(0x43000001U).get_val()), FE_OVERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExp10fTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(max_denormal));
+}
+
+TEST_F(LlvmLibcExp10fTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(max_denormal));
+}
+
+TEST_F(LlvmLibcExp10fTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp10f(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/exp2_test.cpp b/libc/test/src/math/smoke/exp2_test.cpp
index d148d27fad38dc5..d97a384367a09f9 100644
--- a/libc/test/src/math/smoke/exp2_test.cpp
+++ b/libc/test/src/math/smoke/exp2_test.cpp
@@ -31,3 +31,30 @@ TEST_F(LlvmLibcExp2Test, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(4.0, LIBC_NAMESPACE::exp2(2.0));
   EXPECT_FP_EQ_ALL_ROUNDING(0.25, LIBC_NAMESPACE::exp2(-2.0));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExp2Test, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(max_denormal));
+}
+
+TEST_F(LlvmLibcExp2Test, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(max_denormal));
+}
+
+TEST_F(LlvmLibcExp2Test, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp2(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/exp2f_test.cpp b/libc/test/src/math/smoke/exp2f_test.cpp
index 39228eb2f6d8ba2..d9cdecbf0fe9ba4 100644
--- a/libc/test/src/math/smoke/exp2f_test.cpp
+++ b/libc/test/src/math/smoke/exp2f_test.cpp
@@ -55,3 +55,30 @@ TEST_F(LlvmLibcExp2fTest, Overflow) {
       inf, LIBC_NAMESPACE::exp2f(FPBits(0x43000001U).get_val()), FE_OVERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExp2fTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(max_denormal));
+}
+
+TEST_F(LlvmLibcExp2fTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(max_denormal));
+}
+
+TEST_F(LlvmLibcExp2fTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::exp2f(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/exp2m1f_test.cpp b/libc/test/src/math/smoke/exp2m1f_test.cpp
index 2df435385247289..4657d088f07a893 100644
--- a/libc/test/src/math/smoke/exp2m1f_test.cpp
+++ b/libc/test/src/math/smoke/exp2m1f_test.cpp
@@ -61,3 +61,30 @@ TEST_F(LlvmLibcExp2m1fTest, Underflow) {
                               FE_UNDERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExp2m1fTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(max_denormal));
+}
+
+TEST_F(LlvmLibcExp2m1fTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(max_denormal));
+}
+
+TEST_F(LlvmLibcExp2m1fTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::exp2m1f(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/exp_test.cpp b/libc/test/src/math/smoke/exp_test.cpp
index 5fe6f3e92f4a6a6..d2467ff8838969f 100644
--- a/libc/test/src/math/smoke/exp_test.cpp
+++ b/libc/test/src/math/smoke/exp_test.cpp
@@ -27,3 +27,30 @@ TEST_F(LlvmLibcExpTest, SpecialNumbers) {
   EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp(0.0));
   EXPECT_FP_EQ_ALL_ROUNDING(1.0, LIBC_NAMESPACE::exp(-0.0));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExpTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(max_denormal));
+}
+
+TEST_F(LlvmLibcExpTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(max_denormal));
+}
+
+TEST_F(LlvmLibcExpTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(min_denormal));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::exp(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/expf_test.cpp b/libc/test/src/math/smoke/expf_test.cpp
index b954125afd7bba0..11181ed1402c9ee 100644
--- a/libc/test/src/math/smoke/expf_test.cpp
+++ b/libc/test/src/math/smoke/expf_test.cpp
@@ -50,3 +50,30 @@ TEST_F(LlvmLibcExpfTest, Overflow) {
       inf, LIBC_NAMESPACE::expf(FPBits(0x42d00008U).get_val()), FE_OVERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExpfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(max_denormal));
+}
+
+TEST_F(LlvmLibcExpfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(max_denormal));
+}
+
+TEST_F(LlvmLibcExpfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(min_denormal));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::expf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/expm1_test.cpp b/libc/test/src/math/smoke/expm1_test.cpp
index bafdbda8af03bdb..cebd2d757606b0f 100644
--- a/libc/test/src/math/smoke/expm1_test.cpp
+++ b/libc/test/src/math/smoke/expm1_test.cpp
@@ -33,3 +33,30 @@ TEST_F(LlvmLibcExpm1Test, SpecialNumbers) {
   // log(2^-54)
   EXPECT_FP_EQ(-1.0, LIBC_NAMESPACE::expm1(-0x1.2b708872320e2p5));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExpm1Test, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(max_denormal));
+}
+
+TEST_F(LlvmLibcExpm1Test, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(max_denormal));
+}
+
+TEST_F(LlvmLibcExpm1Test, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::expm1(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/expm1f_test.cpp b/libc/test/src/math/smoke/expm1f_test.cpp
index 03b6e47b7c3bc44..f4138aa05ba7e3a 100644
--- a/libc/test/src/math/smoke/expm1f_test.cpp
+++ b/libc/test/src/math/smoke/expm1f_test.cpp
@@ -50,3 +50,30 @@ TEST_F(LlvmLibcExpm1fTest, Overflow) {
       inf, LIBC_NAMESPACE::expm1f(FPBits(0x42d00008U).get_val()), FE_OVERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcExpm1fTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(max_denormal));
+}
+
+TEST_F(LlvmLibcExpm1fTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(max_denormal));
+}
+
+TEST_F(LlvmLibcExpm1fTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::expm1f(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/hypotf_test.cpp b/libc/test/src/math/smoke/hypotf_test.cpp
index 768e7f75e9d678e..62399489987e7ea 100644
--- a/libc/test/src/math/smoke/hypotf_test.cpp
+++ b/libc/test/src/math/smoke/hypotf_test.cpp
@@ -15,3 +15,37 @@ using LlvmLibcHypotfTest = HypotTestTemplate<float>;
 TEST_F(LlvmLibcHypotfTest, SpecialNumbers) {
   test_special_numbers(&LIBC_NAMESPACE::hypotf);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcHypotfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, max_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, min_denormal));
+  EXPECT_FP_EQ(0x1.6a09e4p-126f,
+               LIBC_NAMESPACE::hypotf(max_denormal, max_denormal));
+}
+
+TEST_F(LlvmLibcHypotfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, max_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, max_denormal));
+}
+
+TEST_F(LlvmLibcHypotfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(min_denormal, max_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::hypotf(max_denormal, max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log10_test.cpp b/libc/test/src/math/smoke/log10_test.cpp
index e03416ae20c8f37..9f159f282aad86c 100644
--- a/libc/test/src/math/smoke/log10_test.cpp
+++ b/libc/test/src/math/smoke/log10_test.cpp
@@ -33,3 +33,29 @@ TEST_F(LlvmLibcLog10Test, SpecialNumbers) {
     EXPECT_FP_EQ_ALL_ROUNDING(static_cast<double>(i), LIBC_NAMESPACE::log10(x));
   }
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLog10Test, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(-0x1.434e6420f4374p8, LIBC_NAMESPACE::log10(min_denormal));
+}
+
+TEST_F(LlvmLibcLog10Test, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log10(min_denormal));
+}
+
+TEST_F(LlvmLibcLog10Test, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log10(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log10f_test.cpp b/libc/test/src/math/smoke/log10f_test.cpp
index 2524545e0181236..4e3bf654ca918a9 100644
--- a/libc/test/src/math/smoke/log10f_test.cpp
+++ b/libc/test/src/math/smoke/log10f_test.cpp
@@ -32,3 +32,29 @@ TEST_F(LlvmLibcLog10fTest, SpecialNumbers) {
     EXPECT_FP_EQ_ALL_ROUNDING(static_cast<float>(i), LIBC_NAMESPACE::log10f(x));
   }
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLog10fTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(-0x1.66d3e7bd9a403p5f, LIBC_NAMESPACE::log10f(min_denormal));
+}
+
+TEST_F(LlvmLibcLog10fTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log10f(min_denormal));
+}
+
+TEST_F(LlvmLibcLog10fTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log10f(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log1p_test.cpp b/libc/test/src/math/smoke/log1p_test.cpp
index 63237f3259b2151..eba65f56df73964 100644
--- a/libc/test/src/math/smoke/log1p_test.cpp
+++ b/libc/test/src/math/smoke/log1p_test.cpp
@@ -27,3 +27,27 @@ TEST_F(LlvmLibcLog1pTest, SpecialNumbers) {
   EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1p(-1.0),
                               FE_DIVBYZERO);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLog1pTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
+}
+
+TEST_F(LlvmLibcLog1pTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
+}
+
+TEST_F(LlvmLibcLog1pTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::log1p(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log1pf_test.cpp b/libc/test/src/math/smoke/log1pf_test.cpp
index c14d65529469792..1b0a1d589e684b0 100644
--- a/libc/test/src/math/smoke/log1pf_test.cpp
+++ b/libc/test/src/math/smoke/log1pf_test.cpp
@@ -26,3 +26,27 @@ TEST_F(LlvmLibcLog1pfTest, SpecialNumbers) {
   EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::log1pf(-1.0f),
                               FE_DIVBYZERO);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLog1pfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::log1pf(min_denormal));
+}
+
+TEST_F(LlvmLibcLog1pfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::log1pf(min_denormal));
+}
+
+TEST_F(LlvmLibcLog1pfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::log1pf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log2_test.cpp b/libc/test/src/math/smoke/log2_test.cpp
index 89d8e5651091185..1570d60556df2c3 100644
--- a/libc/test/src/math/smoke/log2_test.cpp
+++ b/libc/test/src/math/smoke/log2_test.cpp
@@ -27,3 +27,29 @@ TEST_F(LlvmLibcLog2Test, SpecialNumbers) {
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2(-1.0), FE_INVALID);
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log2(1.0));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLog2Test, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(-1074.0, LIBC_NAMESPACE::log2(min_denormal));
+}
+
+TEST_F(LlvmLibcLog2Test, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log2(min_denormal));
+}
+
+TEST_F(LlvmLibcLog2Test, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log2(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log2f_test.cpp b/libc/test/src/math/smoke/log2f_test.cpp
index 00bfb7c4abad670..67b2c5b2db13d12 100644
--- a/libc/test/src/math/smoke/log2f_test.cpp
+++ b/libc/test/src/math/smoke/log2f_test.cpp
@@ -28,3 +28,28 @@ TEST_F(LlvmLibcLog2fTest, SpecialNumbers) {
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log2f(-1.0f), FE_INVALID);
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log2f(1.0f));
 }
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLog2fTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(-149.0f, LIBC_NAMESPACE::log2f(min_denormal));
+}
+
+TEST_F(LlvmLibcLog2fTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log2f(min_denormal));
+}
+
+TEST_F(LlvmLibcLog2fTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log2f(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/log_test.cpp b/libc/test/src/math/smoke/log_test.cpp
index e7897add575fade..20b974d7e167d74 100644
--- a/libc/test/src/math/smoke/log_test.cpp
+++ b/libc/test/src/math/smoke/log_test.cpp
@@ -26,3 +26,29 @@ TEST_F(LlvmLibcLogTest, SpecialNumbers) {
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::log(-1.0), FE_INVALID);
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::log(1.0));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLogTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(-0x1.74385446d71c3p9, LIBC_NAMESPACE::log(min_denormal));
+}
+
+TEST_F(LlvmLibcLogTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log(min_denormal));
+}
+
+TEST_F(LlvmLibcLogTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::log(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/logf_test.cpp b/libc/test/src/math/smoke/logf_test.cpp
index a27206027614525..1a3102ae2b14101 100644
--- a/libc/test/src/math/smoke/logf_test.cpp
+++ b/libc/test/src/math/smoke/logf_test.cpp
@@ -27,3 +27,28 @@ TEST_F(LlvmLibcLogfTest, SpecialNumbers) {
   EXPECT_FP_IS_NAN_WITH_EXCEPTION(LIBC_NAMESPACE::logf(-1.0f), FE_INVALID);
   EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::logf(1.0f));
 }
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcLogfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(-0x1.9d1d9fccf477p6f, LIBC_NAMESPACE::logf(min_denormal));
+}
+
+TEST_F(LlvmLibcLogfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::logf(min_denormal));
+}
+
+TEST_F(LlvmLibcLogfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(FPBits::inf(Sign::NEG).get_val(),
+               LIBC_NAMESPACE::logf(min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/pow_test.cpp b/libc/test/src/math/smoke/pow_test.cpp
index 7f0136d783c6ba0..f9db7f102962b93 100644
--- a/libc/test/src/math/smoke/pow_test.cpp
+++ b/libc/test/src/math/smoke/pow_test.cpp
@@ -190,3 +190,30 @@ TEST_F(LlvmLibcPowTest, SpecialNumbers) {
     }
   }
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcPowTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_IS_NAN(LIBC_NAMESPACE::pow(-min_denormal, 0.5));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(2.0, min_denormal));
+}
+
+TEST_F(LlvmLibcPowTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(-min_denormal, 0.5));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(2.0, min_denormal));
+}
+
+TEST_F(LlvmLibcPowTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::pow(-min_denormal, 0.5));
+  EXPECT_FP_EQ(1.0, LIBC_NAMESPACE::pow(2.0, min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/powf_test.cpp b/libc/test/src/math/smoke/powf_test.cpp
index a0f66f2733a1ea9..9cc95ce0baef9fc 100644
--- a/libc/test/src/math/smoke/powf_test.cpp
+++ b/libc/test/src/math/smoke/powf_test.cpp
@@ -194,3 +194,30 @@ TEST_F(LlvmLibcPowfTest, SpecialNumbers) {
   EXPECT_FP_EQ(-0.0f, LIBC_NAMESPACE::powf(-0.015625f, 25.0f));
   EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::powf(-0.015625f, 26.0f));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcPowfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_IS_NAN(LIBC_NAMESPACE::powf(-min_denormal, 0.5f));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::powf(2.0f, min_denormal));
+}
+
+TEST_F(LlvmLibcPowfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::powf(-min_denormal, 0.5f));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::powf(2.0f, min_denormal));
+}
+
+TEST_F(LlvmLibcPowfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::powf(-min_denormal, 0.5f));
+  EXPECT_FP_EQ(1.0f, LIBC_NAMESPACE::powf(2.0f, min_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/sin_test.cpp b/libc/test/src/math/smoke/sin_test.cpp
index 16ced68709ca755..7dd1b7fda625b0d 100644
--- a/libc/test/src/math/smoke/sin_test.cpp
+++ b/libc/test/src/math/smoke/sin_test.cpp
@@ -24,3 +24,30 @@ TEST_F(LlvmLibcSinTest, SpecialNumbers) {
   EXPECT_FP_EQ(min_normal, LIBC_NAMESPACE::sin(min_normal));
   EXPECT_FP_EQ(min_denormal, LIBC_NAMESPACE::sin(min_denormal));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcSinTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(max_denormal));
+}
+
+TEST_F(LlvmLibcSinTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(max_denormal));
+}
+
+TEST_F(LlvmLibcSinTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sin(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/sinf_test.cpp b/libc/test/src/math/smoke/sinf_test.cpp
index 1bf6eaa8b78d7d3..776c66dcb37bdee 100644
--- a/libc/test/src/math/smoke/sinf_test.cpp
+++ b/libc/test/src/math/smoke/sinf_test.cpp
@@ -35,3 +35,30 @@ TEST_F(LlvmLibcSinfTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::sinf(neg_inf));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcSinfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(max_denormal));
+}
+
+TEST_F(LlvmLibcSinfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(max_denormal));
+}
+
+TEST_F(LlvmLibcSinfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/sinhf_test.cpp b/libc/test/src/math/smoke/sinhf_test.cpp
index 635a10627a21096..3cc0656967581ab 100644
--- a/libc/test/src/math/smoke/sinhf_test.cpp
+++ b/libc/test/src/math/smoke/sinhf_test.cpp
@@ -62,3 +62,30 @@ TEST_F(LlvmLibcSinhfTest, Overflow) {
       inf, LIBC_NAMESPACE::sinhf(FPBits(0x42d00008U).get_val()), FE_OVERFLOW);
   EXPECT_MATH_ERRNO(ERANGE);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcSinhfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(max_denormal));
+}
+
+TEST_F(LlvmLibcSinhfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(max_denormal));
+}
+
+TEST_F(LlvmLibcSinhfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinhf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/sinpif_test.cpp b/libc/test/src/math/smoke/sinpif_test.cpp
index 0918294ab3611c2..11bda0b6b28cc77 100644
--- a/libc/test/src/math/smoke/sinpif_test.cpp
+++ b/libc/test/src/math/smoke/sinpif_test.cpp
@@ -41,3 +41,30 @@ TEST_F(LlvmLibcSinpifTest, Integers) {
   EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif(0x1.cp+106));
   EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::sinpif(0x1.cp+21));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcSinpifTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(min_denormal));
+  EXPECT_FP_EQ(0x1.921fb2p-125f, LIBC_NAMESPACE::sinpif(max_denormal));
+}
+
+TEST_F(LlvmLibcSinpifTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(max_denormal));
+}
+
+TEST_F(LlvmLibcSinpifTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::sinpif(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/tan_test.cpp b/libc/test/src/math/smoke/tan_test.cpp
index 498dba76b6e7195..aa5c23d65886d29 100644
--- a/libc/test/src/math/smoke/tan_test.cpp
+++ b/libc/test/src/math/smoke/tan_test.cpp
@@ -24,3 +24,30 @@ TEST_F(LlvmLibcTanTest, SpecialNumbers) {
   EXPECT_FP_EQ(min_normal, LIBC_NAMESPACE::tan(min_normal));
   EXPECT_FP_EQ(min_denormal, LIBC_NAMESPACE::tan(min_denormal));
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcTanTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(max_denormal));
+}
+
+TEST_F(LlvmLibcTanTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(max_denormal));
+}
+
+TEST_F(LlvmLibcTanTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(min_denormal));
+  EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::tan(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/tanf_test.cpp b/libc/test/src/math/smoke/tanf_test.cpp
index b90c5da8741892a..93fbfded3f66a18 100644
--- a/libc/test/src/math/smoke/tanf_test.cpp
+++ b/libc/test/src/math/smoke/tanf_test.cpp
@@ -35,3 +35,30 @@ TEST_F(LlvmLibcTanfTest, SpecialNumbers) {
   EXPECT_FP_EQ(aNaN, LIBC_NAMESPACE::tanf(neg_inf));
   EXPECT_MATH_ERRNO(EDOM);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcTanfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(max_denormal));
+}
+
+TEST_F(LlvmLibcTanfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(max_denormal));
+}
+
+TEST_F(LlvmLibcTanfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/math/smoke/tanhf_test.cpp b/libc/test/src/math/smoke/tanhf_test.cpp
index 748e6fe8c62693d..3b7faa81dac2eac 100644
--- a/libc/test/src/math/smoke/tanhf_test.cpp
+++ b/libc/test/src/math/smoke/tanhf_test.cpp
@@ -35,3 +35,30 @@ TEST_F(LlvmLibcTanhfTest, SpecialNumbers) {
   EXPECT_FP_EQ(-1.0f, LIBC_NAMESPACE::tanhf(neg_inf));
   EXPECT_MATH_ERRNO(0);
 }
+
+#ifdef LIBC_TEST_FTZ_DAZ
+
+using namespace LIBC_NAMESPACE::testing;
+
+TEST_F(LlvmLibcTanhfTest, FTZMode) {
+  ModifyMXCSR mxcsr(FTZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(max_denormal));
+}
+
+TEST_F(LlvmLibcTanhfTest, DAZMode) {
+  ModifyMXCSR mxcsr(DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(max_denormal));
+}
+
+TEST_F(LlvmLibcTanhfTest, FTZDAZMode) {
+  ModifyMXCSR mxcsr(FTZ | DAZ);
+
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(min_denormal));
+  EXPECT_FP_EQ(0.0f, LIBC_NAMESPACE::tanhf(max_denormal));
+}
+
+#endif
diff --git a/libc/test/src/sys/sendfile/CMakeLists.txt b/libc/test/src/sys/sendfile/CMakeLists.txt
index 82efaa147bd89d1..ceaa4accdd06ef2 100644
--- a/libc/test/src/sys/sendfile/CMakeLists.txt
+++ b/libc/test/src/sys/sendfile/CMakeLists.txt
@@ -9,7 +9,7 @@ add_libc_unittest(
   SRCS
     sendfile_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno 
     libc.src.fcntl.open
diff --git a/libc/test/src/sys/sendfile/sendfile_test.cpp b/libc/test/src/sys/sendfile/sendfile_test.cpp
index 59025438a24671e..a658212ddb72cdd 100644
--- a/libc/test/src/sys/sendfile/sendfile_test.cpp
+++ b/libc/test/src/sys/sendfile/sendfile_test.cpp
@@ -17,7 +17,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 namespace cpp = LIBC_NAMESPACE::cpp;
diff --git a/libc/test/src/sys/stat/CMakeLists.txt b/libc/test/src/sys/stat/CMakeLists.txt
index 877a129b627dd45..dd3d0932755b769 100644
--- a/libc/test/src/sys/stat/CMakeLists.txt
+++ b/libc/test/src/sys/stat/CMakeLists.txt
@@ -9,7 +9,7 @@ add_libc_unittest(
   SRCS
     chmod_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.fcntl.open
@@ -25,7 +25,7 @@ add_libc_unittest(
   SRCS
     fchmodat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.fcntl.open
@@ -41,7 +41,7 @@ add_libc_unittest(
   SRCS
     fchmod_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.fcntl.open
@@ -57,7 +57,7 @@ add_libc_unittest(
   SRCS
     mkdirat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.sys.stat.mkdirat
@@ -71,7 +71,7 @@ add_libc_unittest(
   SRCS
     stat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.sys.stat.stat
@@ -87,7 +87,7 @@ add_libc_unittest(
   SRCS
     lstat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.sys.stat.lstat
@@ -103,7 +103,7 @@ add_libc_unittest(
   SRCS
     fstat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_stat
     libc.src.errno.errno
     libc.src.sys.stat.fstat
diff --git a/libc/test/src/sys/stat/chmod_test.cpp b/libc/test/src/sys/stat/chmod_test.cpp
index c688996615ceef1..83ab0f45b6f08ae 100644
--- a/libc/test/src/sys/stat/chmod_test.cpp
+++ b/libc/test/src/sys/stat/chmod_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 TEST(LlvmLibcChmodTest, ChangeAndOpen) {
diff --git a/libc/test/src/sys/stat/fchmod_test.cpp b/libc/test/src/sys/stat/fchmod_test.cpp
index 91c0f68b8708c84..03eb79d95ddd6d3 100644
--- a/libc/test/src/sys/stat/fchmod_test.cpp
+++ b/libc/test/src/sys/stat/fchmod_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 TEST(LlvmLibcChmodTest, ChangeAndOpen) {
diff --git a/libc/test/src/sys/stat/fchmodat_test.cpp b/libc/test/src/sys/stat/fchmodat_test.cpp
index c43ef8ae13315a1..09970b6e0fb163d 100644
--- a/libc/test/src/sys/stat/fchmodat_test.cpp
+++ b/libc/test/src/sys/stat/fchmodat_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 TEST(LlvmLibcFchmodatTest, ChangeAndOpen) {
diff --git a/libc/test/src/sys/stat/fstat_test.cpp b/libc/test/src/sys/stat/fstat_test.cpp
index 1379eae26a47aed..34c675d1a4e2992 100644
--- a/libc/test/src/sys/stat/fstat_test.cpp
+++ b/libc/test/src/sys/stat/fstat_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 TEST(LlvmLibcFStatTest, CreatAndReadMode) {
diff --git a/libc/test/src/sys/stat/lstat_test.cpp b/libc/test/src/sys/stat/lstat_test.cpp
index b44b3d1a59ce7bf..a723d5ae2e297ba 100644
--- a/libc/test/src/sys/stat/lstat_test.cpp
+++ b/libc/test/src/sys/stat/lstat_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 TEST(LlvmLibcLStatTest, CreatAndReadMode) {
diff --git a/libc/test/src/sys/stat/mkdirat_test.cpp b/libc/test/src/sys/stat/mkdirat_test.cpp
index cbacc16b402d7ab..85e013de234e76a 100644
--- a/libc/test/src/sys/stat/mkdirat_test.cpp
+++ b/libc/test/src/sys/stat/mkdirat_test.cpp
@@ -11,7 +11,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 
 TEST(LlvmLibcMkdiratTest, CreateAndRemove) {
   using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
diff --git a/libc/test/src/sys/stat/stat_test.cpp b/libc/test/src/sys/stat/stat_test.cpp
index baf363382022ad3..0ddd8baaec1c9b7 100644
--- a/libc/test/src/sys/stat/stat_test.cpp
+++ b/libc/test/src/sys/stat/stat_test.cpp
@@ -14,7 +14,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>
 
 TEST(LlvmLibcStatTest, CreatAndReadMode) {
diff --git a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp
index 8cb5f867453e45e..2f3e0b96ff09574 100644
--- a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp
+++ b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp
@@ -1,4 +1,4 @@
-#include "llvm-libc-macros/linux/fcntl-macros.h"
+#include "hdr/fcntl_macros.h"
 #include "src/__support/macros/config.h"
 #include "src/fcntl/open.h"
 #include "src/sys/statvfs/fstatvfs.h"
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
index e03e56b3cf8ad71..ce936cebad4260c 100644
--- a/libc/test/src/unistd/CMakeLists.txt
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -24,11 +24,12 @@ add_libc_unittest(
   SRCS
     chdir_test.cpp
   DEPENDS
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.src.errno.errno
-    libc.src.fcntl.open
     libc.src.unistd.chdir
     libc.src.unistd.close
+    libc.src.fcntl.open 
     libc.test.UnitTest.ErrnoSetterMatcher
 )
 
@@ -223,7 +224,7 @@ add_libc_unittest(
   SRCS
     rmdir_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.src.errno.errno
     libc.src.sys.stat.mkdir
     libc.src.unistd.rmdir
@@ -262,7 +263,7 @@ add_libc_unittest(
   SRCS
     readlinkat_test.cpp
   DEPENDS
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.unistd
     libc.src.errno.errno
     libc.src.unistd.readlinkat
@@ -410,7 +411,7 @@ add_libc_unittest(
     syscall_test.cpp
   DEPENDS
     libc.include.unistd
-    libc.include.fcntl
+    libc.hdr.fcntl_macros
     libc.include.sys_syscall
     libc.src.errno.errno 
     libc.src.unistd.__llvm_libc_syscall
diff --git a/libc/test/src/unistd/chdir_test.cpp b/libc/test/src/unistd/chdir_test.cpp
index 51dc7bb15d3ee6d..e1bdcd77119f733 100644
--- a/libc/test/src/unistd/chdir_test.cpp
+++ b/libc/test/src/unistd/chdir_test.cpp
@@ -13,7 +13,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 
 TEST(LlvmLibcChdirTest, ChangeAndOpen) {
   // The idea of this test is that we will first open an existing test file
diff --git a/libc/test/src/unistd/fchdir_test.cpp b/libc/test/src/unistd/fchdir_test.cpp
index ae88e1f22ed6b76..0e39fde17c67bba 100644
--- a/libc/test/src/unistd/fchdir_test.cpp
+++ b/libc/test/src/unistd/fchdir_test.cpp
@@ -13,7 +13,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 
 TEST(LlvmLibcChdirTest, ChangeAndOpen) {
   // The idea of this test is that we will first open an existing test file
diff --git a/libc/test/src/unistd/readlinkat_test.cpp b/libc/test/src/unistd/readlinkat_test.cpp
index 1fa683b02b5b5e6..9e4bb9af02e76a9 100644
--- a/libc/test/src/unistd/readlinkat_test.cpp
+++ b/libc/test/src/unistd/readlinkat_test.cpp
@@ -15,7 +15,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 
 namespace cpp = LIBC_NAMESPACE::cpp;
 
diff --git a/libc/test/src/unistd/rmdir_test.cpp b/libc/test/src/unistd/rmdir_test.cpp
index 93cb0f3f53c1b0b..4f4cd94c5cf0b73 100644
--- a/libc/test/src/unistd/rmdir_test.cpp
+++ b/libc/test/src/unistd/rmdir_test.cpp
@@ -12,7 +12,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 
 TEST(LlvmLibcRmdirTest, CreateAndRemove) {
   using LIBC_NAMESPACE::testing::ErrnoSetterMatcher::Succeeds;
diff --git a/libc/test/src/unistd/syscall_test.cpp b/libc/test/src/unistd/syscall_test.cpp
index cee29bd9afa308b..f6cc3eab9aabe87 100644
--- a/libc/test/src/unistd/syscall_test.cpp
+++ b/libc/test/src/unistd/syscall_test.cpp
@@ -11,7 +11,7 @@
 #include "test/UnitTest/ErrnoSetterMatcher.h"
 #include "test/UnitTest/Test.h"
 
-#include <fcntl.h>
+#include "hdr/fcntl_macros.h"
 #include <sys/stat.h>    // For S_* flags.
 #include <sys/syscall.h> // For syscall numbers.
 #include <unistd.h>
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index bd4fbe294a622d3..60e4abadb5e3c8a 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -255,19 +255,13 @@ class MPFRNumber {
     mpfr_cospi(result.value, value, mpfr_rounding);
     return result;
 #else
-    MPFRNumber value_frac(*this);
-    mpfr_frac(value_frac.value, value, MPFR_RNDN);
-
-    if (mpfr_cmp_si(value_frac.value, 0.0) == 0) {
-      mpz_t integer_part;
-      mpz_init(integer_part);
-      mpfr_get_z(integer_part, value, MPFR_RNDN);
-
-      if (mpz_tstbit(integer_part, 0)) {
-        mpfr_set_si(result.value, -1.0, MPFR_RNDN); // odd
-      } else {
-        mpfr_set_si(result.value, 1.0, MPFR_RNDN); // even
-      }
+    if (mpfr_integer_p(value)) {
+      mpz_t integer;
+      mpz_init(integer);
+      mpfr_get_z(integer, value, mpfr_rounding);
+
+      int d = mpz_tstbit(integer, 0);
+      mpfr_set_si(result.value, d ? -1 : 1, mpfr_rounding);
       return result;
     }
 
diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 3d7c3591a556e56..16d74e53295cc12 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -29,7 +29,13 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
   ptx-nvidiacl/lib/SOURCES;
   r600/lib/SOURCES;
   spirv/lib/SOURCES;
-  spirv64/lib/SOURCES
+  spirv64/lib/SOURCES;
+  # CLC internal libraries
+  clc/lib/generic/SOURCES;
+  clc/lib/clspv/SOURCES;
+  clc/lib/clspv64/SOURCES;
+  clc/lib/spirv/SOURCES;
+  clc/lib/spirv64/SOURCES;
 )
 
 set( LIBCLC_MIN_LLVM 3.9.0 )
@@ -278,49 +284,30 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
     set( DARCH ${ARCH} )
   endif()
 
-  # Enumerate SOURCES* files
-  set( source_list )
-  foreach( l ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} )
-    foreach( s "SOURCES" "SOURCES_${LLVM_MAJOR}.${LLVM_MINOR}" )
-      file( TO_CMAKE_PATH ${l}/lib/${s} file_loc )
-      file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc )
-      # Prepend the location to give higher priority to
-      # specialized implementation
-      if( EXISTS ${loc} )
-        set( source_list ${file_loc} ${source_list} )
-      endif()
-    endforeach()
-  endforeach()
-
-  # Add the generated convert.cl here to prevent adding the one listed in
-  # SOURCES
-  set( objects )   # A "set" of already-added input files
-  set( rel_files ) # Source directory input files, relative to the root dir
-  set( gen_files ) # Generated binary input files, relative to the binary dir
-  if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" )
-    if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND
-        NOT ${ARCH} STREQUAL "clspv64" )
-      list( APPEND gen_files convert.cl )
-      list( APPEND objects convert.cl )
-      list( APPEND rel_files generic/lib/subnormal_use_default.ll )
-    elseif(${ARCH} STREQUAL "clspv" OR ${ARCH} STREQUAL "clspv64")
-      list( APPEND gen_files clspv-convert.cl )
-      list( APPEND objects clspv-convert.cl )
+  set( clc_lib_files )
+  libclc_configure_lib_source(
+    clc_lib_files
+    CLC_INTERNAL
+    LIB_ROOT_DIR clc
+    DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS}
+  )
+
+  set( opencl_lib_files )
+  set( opencl_gen_files )
+
+  if( NOT ARCH STREQUAL spirv AND NOT ARCH STREQUAL spirv64 )
+    if( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
+      list( APPEND opencl_gen_files clspv-convert.cl )
+    elseif ( NOT ENABLE_RUNTIME_SUBNORMAL )
+      list( APPEND opencl_gen_files convert.cl )
+      list( APPEND opencl_lib_files generic/lib/subnormal_use_default.ll )
     endif()
   endif()
 
-  foreach( l ${source_list} )
-    file( READ ${l} file_list )
-    string( REPLACE "\n" ";" file_list ${file_list} )
-    get_filename_component( dir ${l} DIRECTORY )
-    foreach( f ${file_list} )
-      # Only add each file once, so that targets can 'specialize' builtins
-      if( NOT ${f} IN_LIST objects )
-        list( APPEND objects ${f} )
-        list( APPEND rel_files ${dir}/${f} )
-      endif()
-    endforeach()
-  endforeach()
+  libclc_configure_lib_source(
+    opencl_lib_files
+    DIRS ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS}
+  )
 
   foreach( d ${${t}_devices} )
     get_libclc_device_info(
@@ -331,11 +318,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
       CLANG_TRIPLE clang_triple
     )
 
-    set( mcpu )
-    if( NOT "${cpu}" STREQUAL "" )
-      set( mcpu "-mcpu=${cpu}" )
-    endif()
-
     message( STATUS "  device: ${d} ( ${${d}_aliases} )" )
 
     if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
@@ -358,114 +340,41 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
     list( APPEND build_flags
       -D__CLC_INTERNAL
       -D${CLC_TARGET_DEFINE}
-      -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
+      # All libclc builtin libraries see CLC headers
+      -I${CMAKE_CURRENT_SOURCE_DIR}/clc/include
       # FIXME: Fix libclc to not require disabling this noisy warning
       -Wno-bitwise-conditional-parentheses
     )
 
-    set( bytecode_files "" )
-    foreach( file IN LISTS gen_files rel_files )
-      # We need to take each file and produce an absolute input file, as well
-      # as a unique architecture-specific output file. We deal with a mix of
-      # different input files, which makes this trickier.
-      if( ${file} IN_LIST gen_files )
-        # Generated files are given just as file names, which we must make
-        # absolute to the binary directory.
-        set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} )
-        set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" )
-      else()
-        # Other files are originally relative to each SOURCE file, which are
-        # then make relative to the libclc root directory. We must normalize
-        # the path (e.g., ironing out any ".."), then make it relative to the
-        # root directory again, and use that relative path component for the
-        # binary path.
-        get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
-        file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} )
-        set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} )
-        set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" )
-      endif()
-
-      get_filename_component( file_dir ${file} DIRECTORY )
-
-      compile_to_bc(
-        TRIPLE ${clang_triple}
-        INPUT ${input_file}
-        OUTPUT ${output_file}
-        EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib
-                   "${build_flags}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir}
-        DEPENDENCIES generate_convert.cl clspv-generate_convert.cl
-      )
-      list( APPEND bytecode_files ${output_file} )
-    endforeach()
+    if( NOT "${cpu}" STREQUAL "" )
+      list( APPEND build_flags -mcpu=${cpu} )
+    endif()
 
-    set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} )
-    add_custom_target( ${builtins_comp_lib_tgt}
-      DEPENDS ${bytecode_files}
+    add_libclc_builtin_set(
+      CLC_INTERNAL
+      ARCH ${ARCH}
+      ARCH_SUFFIX clc-${arch_suffix}
+      TRIPLE ${clang_triple}
+      COMPILE_FLAGS ${build_flags}
+      OPT_FLAGS ${opt_flags}
+      LIB_FILES ${clc_lib_files}
     )
-    set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" )
 
-    set( builtins_link_lib_tgt builtins.link.${arch_suffix} )
-    link_bc(
-      TARGET ${builtins_link_lib_tgt}
-      INPUTS ${bytecode_files}
-      DEPENDENCIES ${builtins_comp_lib_tgt}
+    list( APPEND build_flags
+      -I${CMAKE_CURRENT_SOURCE_DIR}/generic/include
     )
 
-    set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> )
-
-    if( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
-      set( spv_suffix ${arch_suffix}.spv )
-      add_custom_command( OUTPUT ${spv_suffix}
-        COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib}
-        DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
-      )
-      add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" )
-      set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" )
-      install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix}
-         DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
-    else()
-      set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} )
-
-      # Add opt target
-      add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
-        COMMAND ${opt_exe} ${opt_flags} -o ${builtins_opt_lib_tgt}.bc
-          ${builtins_link_lib}
-        DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
-      )
-      add_custom_target( ${builtins_opt_lib_tgt}
-        ALL DEPENDS ${builtins_opt_lib_tgt}.bc
-      )
-      set_target_properties( ${builtins_opt_lib_tgt} PROPERTIES
-        TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${builtins_opt_lib_tgt}.bc
-        FOLDER "libclc/Device IR/Opt"
-      )
-
-      set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> )
-
-      # Add prepare target
-      set( obj_suffix ${arch_suffix}.bc )
-      add_custom_command( OUTPUT ${obj_suffix}
-        COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib}
-        DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} )
-      add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} )
-      set_target_properties( "prepare-${obj_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" )
-
-      # nvptx-- targets don't include workitem builtins
-      if( NOT clang_triple MATCHES ".*ptx.*--$" )
-        add_test( NAME external-calls-${obj_suffix}
-          COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR}
-          WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} )
-      endif()
-
-      install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
-      foreach( a ${${d}_aliases} )
-        set( alias_suffix "${a}-${clang_triple}.bc" )
-        add_custom_target( ${alias_suffix} ALL
-          COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix}
-          DEPENDS prepare-${obj_suffix} )
-        set_target_properties( "${alias_suffix}" PROPERTIES FOLDER "libclc/Device IR/Aliases" )
-        install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
-      endforeach( a )
-    endif()
+    add_libclc_builtin_set(
+      ARCH ${ARCH}
+      ARCH_SUFFIX ${arch_suffix}
+      TRIPLE ${clang_triple}
+      COMPILE_FLAGS ${build_flags}
+      OPT_FLAGS ${opt_flags}
+      LIB_FILES ${opencl_lib_files}
+      GEN_FILES ${opencl_gen_files}
+      ALIASES ${${d}_aliases}
+      # Link in the CLC builtins and internalize their symbols
+      INTERNAL_LINK_DEPENDENCIES $<TARGET_PROPERTY:builtins.link.clc-${arch_suffix},TARGET_FILE>
+    )
   endforeach( d )
 endforeach( t )
diff --git a/libclc/generic/include/clc/clcfunc.h b/libclc/clc/include/clc/clcfunc.h
similarity index 85%
rename from libclc/generic/include/clc/clcfunc.h
rename to libclc/clc/include/clc/clcfunc.h
index 086d780b970859d..fe3406f64fecb8d 100644
--- a/libclc/generic/include/clc/clcfunc.h
+++ b/libclc/clc/include/clc/clcfunc.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLCFUNC_H_
+#define __CLC_CLCFUNC_H_
+
 #define _CLC_OVERLOAD __attribute__((overloadable))
 #define _CLC_DECL
 #define _CLC_INLINE __attribute__((always_inline)) inline
@@ -11,3 +14,5 @@
 #else
 #define _CLC_DEF __attribute__((always_inline))
 #endif
+
+#endif // __CLC_CLCFUNC_H_
diff --git a/libclc/generic/include/clc/clctypes.h b/libclc/clc/include/clc/clctypes.h
similarity index 94%
rename from libclc/generic/include/clc/clctypes.h
rename to libclc/clc/include/clc/clctypes.h
index 76b816d395c2888..8ededd967e00339 100644
--- a/libclc/generic/include/clc/clctypes.h
+++ b/libclc/clc/include/clc/clctypes.h
@@ -1,3 +1,6 @@
+#ifndef __CLC_CLCTYPES_H_
+#define __CLC_CLCTYPES_H_
+
 /* 6.1.1 Built-in Scalar Data Types */
 
 typedef unsigned char uchar;
@@ -8,12 +11,12 @@ typedef unsigned long ulong;
 typedef __SIZE_TYPE__ size_t;
 typedef __PTRDIFF_TYPE__ ptrdiff_t;
 
-#define __stdint_join3(a,b,c) a ## b ## c
+#define __stdint_join3(a, b, c) a##b##c
 
-#define  __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
+#define __intn_t(n) __stdint_join3(__INT, n, _TYPE__)
 #define __uintn_t(n) __stdint_join3(unsigned __INT, n, _TYPE__)
 
-typedef  __intn_t(__INTPTR_WIDTH__)  intptr_t;
+typedef __intn_t(__INTPTR_WIDTH__) intptr_t;
 typedef __uintn_t(__INTPTR_WIDTH__) uintptr_t;
 
 #undef __uintn_t
@@ -93,3 +96,5 @@ typedef __attribute__((ext_vector_type(4))) half half4;
 typedef __attribute__((ext_vector_type(8))) half half8;
 typedef __attribute__((ext_vector_type(16))) half half16;
 #endif
+
+#endif // __CLC_CLCTYPES_H_
diff --git a/libclc/clc/include/clc/geometric/clc_dot.h b/libclc/clc/include/clc/geometric/clc_dot.h
new file mode 100644
index 000000000000000..e0e47ab2093efdb
--- /dev/null
+++ b/libclc/clc/include/clc/geometric/clc_dot.h
@@ -0,0 +1,2 @@
+#define __CLC_BODY <clc/geometric/clc_dot.inc>
+#include <clc/geometric/floatn.inc>
diff --git a/libclc/clc/include/clc/geometric/clc_dot.inc b/libclc/clc/include/clc/geometric/clc_dot.inc
new file mode 100644
index 000000000000000..016b564df362d20
--- /dev/null
+++ b/libclc/clc/include/clc/geometric/clc_dot.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_FLOAT __clc_dot(__CLC_FLOATN p0, __CLC_FLOATN p1);
diff --git a/libclc/clc/include/clc/internal/clc.h b/libclc/clc/include/clc/internal/clc.h
new file mode 100644
index 000000000000000..c3bdfd754105f74
--- /dev/null
+++ b/libclc/clc/include/clc/internal/clc.h
@@ -0,0 +1,26 @@
+#ifndef __CLC_INTERNAL_CLC_H_
+#define __CLC_INTERNAL_CLC_H_
+
+#ifndef cl_clang_storage_class_specifiers
+#error Implementation requires cl_clang_storage_class_specifiers extension!
+#endif
+
+#pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#ifdef cl_khr_fp16
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#endif
+
+/* Function Attributes */
+#include <clc/clcfunc.h>
+
+/* 6.1 Supported Data Types */
+#include <clc/clctypes.h>
+
+#pragma OPENCL EXTENSION all : disable
+
+#endif // __CLC_INTERNAL_CLC_H_
diff --git a/libclc/clc/lib/clspv/SOURCES b/libclc/clc/lib/clspv/SOURCES
new file mode 100644
index 000000000000000..75a3130357c3456
--- /dev/null
+++ b/libclc/clc/lib/clspv/SOURCES
@@ -0,0 +1 @@
+dummy.cl
diff --git a/libclc/clc/lib/clspv/dummy.cl b/libclc/clc/lib/clspv/dummy.cl
new file mode 100644
index 000000000000000..fab17ac780e3751
--- /dev/null
+++ b/libclc/clc/lib/clspv/dummy.cl
@@ -0,0 +1 @@
+// Empty file
diff --git a/libclc/clc/lib/clspv64 b/libclc/clc/lib/clspv64
new file mode 120000
index 000000000000000..ea01ba94bc63684
--- /dev/null
+++ b/libclc/clc/lib/clspv64
@@ -0,0 +1 @@
+clspv
\ No newline at end of file
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
new file mode 100644
index 000000000000000..fa2e4f50b99cd77
--- /dev/null
+++ b/libclc/clc/lib/generic/SOURCES
@@ -0,0 +1 @@
+geometric/clc_dot.cl
diff --git a/libclc/clc/lib/generic/geometric/clc_dot.cl b/libclc/clc/lib/generic/geometric/clc_dot.cl
new file mode 100644
index 000000000000000..bf0f19b51bc05ea
--- /dev/null
+++ b/libclc/clc/lib/generic/geometric/clc_dot.cl
@@ -0,0 +1,57 @@
+#include <clc/internal/clc.h>
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float p0, float p1) { return p0 * p1; }
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float2 p0, float2 p1) {
+  return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float3 p0, float3 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF float __clc_dot(float4 p0, float4 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double p0, double p1) {
+  return p0 * p1;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double2 p0, double2 p1) {
+  return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double3 p0, double3 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF double __clc_dot(double4 p0, double4 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#endif
+
+#ifdef cl_khr_fp16
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half p0, half p1) { return p0 * p1; }
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half2 p0, half2 p1) {
+  return p0.x * p1.x + p0.y * p1.y;
+}
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half3 p0, half3 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z;
+}
+
+_CLC_OVERLOAD _CLC_DEF half __clc_dot(half4 p0, half4 p1) {
+  return p0.x * p1.x + p0.y * p1.y + p0.z * p1.z + p0.w * p1.w;
+}
+
+#endif
diff --git a/libclc/clc/lib/spirv/SOURCES b/libclc/clc/lib/spirv/SOURCES
new file mode 100644
index 000000000000000..d8effd19613c8b5
--- /dev/null
+++ b/libclc/clc/lib/spirv/SOURCES
@@ -0,0 +1,2 @@
+../generic/geometric/clc_dot.cl
+
diff --git a/libclc/clc/lib/spirv64/SOURCES b/libclc/clc/lib/spirv64/SOURCES
new file mode 100644
index 000000000000000..9200810ace38e7c
--- /dev/null
+++ b/libclc/clc/lib/spirv64/SOURCES
@@ -0,0 +1 @@
+../generic/geometric/clc_dot.cl
diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake
index f2032660ba99b0b..ee7c8500c8359fb 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -76,6 +76,8 @@ endfunction()
 # Links together one or more bytecode files
 #
 # Arguments:
+# * INTERNALIZE
+#     Set if -internalize flag should be passed when linking
 # * TARGET <string>
 #     Custom target to create
 # * INPUT <string> ...
@@ -84,7 +86,7 @@ endfunction()
 #     List of extra dependencies to inject
 function(link_bc)
   cmake_parse_arguments(ARG
-    ""
+    "INTERNALIZE"
     "TARGET"
     "INPUTS;DEPENDENCIES"
     ${ARGN}
@@ -97,7 +99,7 @@ function(link_bc)
     file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
     # Turn it into a space-separate list of input files
     list( JOIN ARG_INPUTS " " RSP_INPUT )
-    file( WRITE ${RSP_FILE} ${RSP_INPUT} )
+    file( GENERATE OUTPUT ${RSP_FILE} CONTENT ${RSP_INPUT} )
     # Ensure that if this file is removed, we re-run CMake
     set_property( DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS
       ${RSP_FILE}
@@ -107,7 +109,7 @@ function(link_bc)
 
   add_custom_command(
     OUTPUT ${ARG_TARGET}.bc
-    COMMAND ${llvm-link_exe} -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
+    COMMAND ${llvm-link_exe} $<$<BOOL:${ARG_INTERNALIZE}>:--internalize> -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
     DEPENDS ${llvm-link_target} ${ARG_DEPENDENCIES} ${ARG_INPUTS} ${RSP_FILE}
   )
 
@@ -178,3 +180,251 @@ function(get_libclc_device_info)
     set( ${ARG_CLANG_TRIPLE} ${ARG_TRIPLE} PARENT_SCOPE )
   endif()
 endfunction()
+
+# Compiles a list of library source files (provided by LIB_FILES/GEN_FILES) and
+# compiles them to LLVM bytecode (or SPIR-V), links them together and optimizes
+# them.
+#
+# For bytecode libraries, a list of ALIASES may optionally be provided to
+# produce additional symlinks.
+#
+# Arguments:
+#  * ARCH <string>
+#      libclc architecture being built
+#  * ARCH_SUFFIX <string>
+#      libclc architecture/triple suffix
+#  * TRIPLE <string>
+#      Triple used to compile
+#
+# Optional Arguments:
+# * CLC_INTERNAL
+#     Pass if compiling the internal CLC builtin libraries, which are not
+#     optimized and do not have aliases created.
+#  * LIB_FILES <string> ...
+#      List of files that should be built for this library
+#  * GEN_FILES <string> ...
+#      List of generated files (in build dir) that should be built for this library
+#  * COMPILE_FLAGS <string> ...
+#      Compilation options (for clang)
+#  * OPT_FLAGS <string> ...
+#      Optimization options (for opt)
+#  * ALIASES <string> ...
+#      List of aliases
+#  * INTERNAL_LINK_DEPENDENCIES <string> ...
+#      A list of extra bytecode files to link into the builtin library. Symbols
+#      from these link dependencies will be internalized during linking.
+function(add_libclc_builtin_set)
+  cmake_parse_arguments(ARG
+    "CLC_INTERNAL"
+    "ARCH;TRIPLE;ARCH_SUFFIX"
+    "LIB_FILES;GEN_FILES;COMPILE_FLAGS;OPT_FLAGS;ALIASES;INTERNAL_LINK_DEPENDENCIES"
+    ${ARGN}
+  )
+
+  if( NOT ARG_ARCH OR NOT ARG_ARCH_SUFFIX OR NOT ARG_TRIPLE )
+    message( FATAL_ERROR "Must provide ARCH, ARCH_SUFFIX, and TRIPLE" )
+  endif()
+
+  set( bytecode_files "" )
+  foreach( file IN LISTS ARG_GEN_FILES ARG_LIB_FILES )
+    # We need to take each file and produce an absolute input file, as well
+    # as a unique architecture-specific output file. We deal with a mix of
+    # different input files, which makes this trickier.
+    if( ${file} IN_LIST ARG_GEN_FILES )
+      # Generated files are given just as file names, which we must make
+      # absolute to the binary directory.
+      set( input_file ${CMAKE_CURRENT_BINARY_DIR}/${file} )
+      set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${file}.bc" )
+    else()
+      # Other files are originally relative to each SOURCE file, which are
+      # then make relative to the libclc root directory. We must normalize
+      # the path (e.g., ironing out any ".."), then make it relative to the
+      # root directory again, and use that relative path component for the
+      # binary path.
+      get_filename_component( abs_path ${file} ABSOLUTE BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR} )
+      file( RELATIVE_PATH root_rel_path ${CMAKE_CURRENT_SOURCE_DIR} ${abs_path} )
+      set( input_file ${CMAKE_CURRENT_SOURCE_DIR}/${file} )
+      set( output_file "${LIBCLC_ARCH_OBJFILE_DIR}/${root_rel_path}.bc" )
+    endif()
+
+    get_filename_component( file_dir ${file} DIRECTORY )
+
+    compile_to_bc(
+      TRIPLE ${ARG_TRIPLE}
+      INPUT ${input_file}
+      OUTPUT ${output_file}
+      EXTRA_OPTS -fno-builtin -nostdlib
+        "${ARG_COMPILE_FLAGS}" -I${CMAKE_CURRENT_SOURCE_DIR}/${file_dir}
+      DEPENDENCIES generate_convert.cl clspv-generate_convert.cl
+    )
+    list( APPEND bytecode_files ${output_file} )
+  endforeach()
+
+  set( builtins_comp_lib_tgt builtins.comp.${ARG_ARCH_SUFFIX} )
+  add_custom_target( ${builtins_comp_lib_tgt}
+    DEPENDS ${bytecode_files}
+  )
+  set_target_properties( ${builtins_comp_lib_tgt} PROPERTIES FOLDER "libclc/Device IR/Comp" )
+
+  if( NOT bytecode_files )
+    message(FATAL_ERROR "Cannot create an empty builtins library")
+  endif()
+
+  set( builtins_link_lib_tgt builtins.link.${ARG_ARCH_SUFFIX} )
+
+  if( NOT ARG_INTERNAL_LINK_DEPENDENCIES )
+    link_bc(
+      TARGET ${builtins_link_lib_tgt}
+      INPUTS ${bytecode_files}
+      DEPENDENCIES ${builtins_comp_lib_tgt}
+    )
+  else()
+    # If we have libraries to link while internalizing their symbols, we need
+    # two separate link steps; the --internalize flag applies to all link
+    # inputs but the first.
+    set( builtins_link_lib_tmp_tgt builtins.link.pre-deps.${ARG_ARCH_SUFFIX} )
+    link_bc(
+      TARGET ${builtins_link_lib_tmp_tgt}
+      INPUTS ${bytecode_files}
+      DEPENDENCIES ${builtins_comp_lib_tgt}
+    )
+    link_bc(
+      INTERNALIZE
+      TARGET ${builtins_link_lib_tgt}
+      INPUTS $<TARGET_PROPERTY:${builtins_link_lib_tmp_tgt},TARGET_FILE>
+        ${ARG_INTERNAL_LINK_DEPENDENCIES}
+      DEPENDENCIES ${builtins_link_lib_tmp_tgt}
+    )
+  endif()
+
+  # For the CLC internal builtins, exit here - we only optimize the targets'
+  # entry points once we've linked the CLC buitins into them
+  if( ARG_CLC_INTERNAL )
+    return()
+  endif()
+
+  set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> )
+
+  if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 )
+    set( spv_suffix ${ARG_ARCH_SUFFIX}.spv )
+    add_custom_command( OUTPUT ${spv_suffix}
+      COMMAND ${llvm-spirv_exe} ${spvflags} -o ${spv_suffix} ${builtins_link_lib}
+      DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
+    )
+    add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" )
+    set_target_properties( "prepare-${spv_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" )
+    install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix}
+       DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+
+    return()
+  endif()
+
+  set( builtins_opt_lib_tgt builtins.opt.${ARG_ARCH_SUFFIX} )
+
+  # Add opt target
+  add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
+    COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc
+      ${builtins_link_lib}
+    DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
+  )
+  add_custom_target( ${builtins_opt_lib_tgt}
+    ALL DEPENDS ${builtins_opt_lib_tgt}.bc
+  )
+  set_target_properties( ${builtins_opt_lib_tgt} PROPERTIES
+    TARGET_FILE ${CMAKE_CURRENT_BINARY_DIR}/${builtins_opt_lib_tgt}.bc
+    FOLDER "libclc/Device IR/Opt"
+  )
+
+  set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> )
+
+  # Add prepare target
+  set( obj_suffix ${ARG_ARCH_SUFFIX}.bc )
+  add_custom_command( OUTPUT ${obj_suffix}
+    COMMAND ${prepare_builtins_exe} -o ${obj_suffix} ${builtins_opt_lib}
+    DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} )
+  add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} )
+  set_target_properties( "prepare-${obj_suffix}" PROPERTIES FOLDER "libclc/Device IR/Prepare" )
+
+  # nvptx-- targets don't include workitem builtins
+  if( NOT ARG_TRIPLE MATCHES ".*ptx.*--$" )
+    add_test( NAME external-calls-${obj_suffix}
+      COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR}
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} )
+  endif()
+
+  install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+  foreach( a ${ARG_ALIASES} )
+    set( alias_suffix "${a}-${ARG_TRIPLE}.bc" )
+    add_custom_target( ${alias_suffix} ALL
+      COMMAND ${CMAKE_COMMAND} -E create_symlink ${obj_suffix} ${alias_suffix}
+      DEPENDS prepare-${obj_suffix} )
+    set_target_properties( "${alias_suffix}" PROPERTIES FOLDER "libclc/Device IR/Aliases" )
+    install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${alias_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" )
+  endforeach( a )
+endfunction(add_libclc_builtin_set)
+
+# Produces a list of libclc source files by walking over SOURCES files in a
+# given directory. Outputs the list of files in LIB_FILE_LIST.
+#
+# LIB_FILE_LIST may be pre-populated and is appended to.
+#
+# Arguments:
+# * CLC_INTERNAL
+#     Pass if compiling the internal CLC builtin libraries, which have a
+#     different directory structure.
+# * LIB_ROOT_DIR <string>
+#     Root directory containing target's lib files, relative to libclc root
+#     directory. If not provided, is set to '.'.
+# * DIRS <string> ...
+#     List of directories under LIB_ROOT_DIR to walk over searching for SOURCES
+#     files
+function(libclc_configure_lib_source LIB_FILE_LIST)
+  cmake_parse_arguments(ARG
+    "CLC_INTERNAL"
+    "LIB_ROOT_DIR"
+    "DIRS"
+    ${ARGN}
+  )
+
+  if( NOT ARG_LIB_ROOT_DIR )
+    set(ARG_LIB_ROOT_DIR  ".")
+  endif()
+
+  # Enumerate SOURCES* files
+  set( source_list )
+  foreach( l ${ARG_DIRS} )
+    foreach( s "SOURCES" "SOURCES_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}" )
+      if( ARG_CLC_INTERNAL )
+        file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/lib/${l}/${s} file_loc )
+      else()
+        file( TO_CMAKE_PATH ${ARG_LIB_ROOT_DIR}/${l}/lib/${s} file_loc )
+      endif()
+      file( TO_CMAKE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${file_loc} loc )
+      # Prepend the location to give higher priority to
+      # specialized implementation
+      if( EXISTS ${loc} )
+        set( source_list ${file_loc} ${source_list} )
+      endif()
+    endforeach()
+  endforeach()
+
+  ## Add the generated convert files here to prevent adding the ones listed in
+  ## SOURCES
+  set( rel_files ${${LIB_FILE_LIST}} ) # Source directory input files, relative to the root dir
+  set( objects ${${LIB_FILE_LIST}} )   # A "set" of already-added input files
+
+  foreach( l ${source_list} )
+    file( READ ${l} file_list )
+    string( REPLACE "\n" ";" file_list ${file_list} )
+    get_filename_component( dir ${l} DIRECTORY )
+    foreach( f ${file_list} )
+      # Only add each file once, so that targets can 'specialize' builtins
+      if( NOT ${f} IN_LIST objects )
+        list( APPEND objects ${f} )
+        list( APPEND rel_files ${dir}/${f} )
+      endif()
+    endforeach()
+  endforeach()
+
+  set( ${LIB_FILE_LIST} ${rel_files} PARENT_SCOPE )
+endfunction(libclc_configure_lib_source LIB_FILE_LIST)
diff --git a/libclc/generic/lib/geometric/dot.cl b/libclc/generic/lib/geometric/dot.cl
index e58bc26f4333a7e..e790d02636563cd 100644
--- a/libclc/generic/lib/geometric/dot.cl
+++ b/libclc/generic/lib/geometric/dot.cl
@@ -1,19 +1,20 @@
 #include <clc/clc.h>
+#include <clc/geometric/clc_dot.h>
 
 _CLC_OVERLOAD _CLC_DEF float dot(float p0, float p1) {
-  return p0*p1;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF float dot(float2 p0, float2 p1) {
-  return p0.x*p1.x + p0.y*p1.y;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF float dot(float3 p0, float3 p1) {
-  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
-  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+  return __clc_dot(p0, p1);
 }
 
 #ifdef cl_khr_fp64
@@ -21,19 +22,19 @@ _CLC_OVERLOAD _CLC_DEF float dot(float4 p0, float4 p1) {
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 _CLC_OVERLOAD _CLC_DEF double dot(double p0, double p1) {
-  return p0*p1;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF double dot(double2 p0, double2 p1) {
-  return p0.x*p1.x + p0.y*p1.y;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF double dot(double3 p0, double3 p1) {
-  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
-  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+  return __clc_dot(p0, p1);
 }
 
 #endif
@@ -42,20 +43,18 @@ _CLC_OVERLOAD _CLC_DEF double dot(double4 p0, double4 p1) {
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
-_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) {
-  return p0*p1;
-}
+_CLC_OVERLOAD _CLC_DEF half dot(half p0, half p1) { return __clc_dot(p0, p1); }
 
 _CLC_OVERLOAD _CLC_DEF half dot(half2 p0, half2 p1) {
-  return p0.x*p1.x + p0.y*p1.y;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF half dot(half3 p0, half3 p1) {
-  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z;
+  return __clc_dot(p0, p1);
 }
 
 _CLC_OVERLOAD _CLC_DEF half dot(half4 p0, half4 p1) {
-  return p0.x*p1.x + p0.y*p1.y + p0.z*p1.z + p0.w*p1.w;
+  return __clc_dot(p0, p1);
 }
 
 #endif
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 574b262018cd3a5..95a7d10f055ea70 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -45,10 +45,6 @@ include(CMakeDependentOption)
 include(HandleCompilerRT)
 
 # Basic options ---------------------------------------------------------------
-option(LIBCXX_ENABLE_ASSERTIONS
-  "Enable assertions inside the compiled library, and at the same time make it the
-   default when compiling user code. Note that assertions can be enabled or disabled
-   by users in their own code regardless of this option." OFF)
 option(LIBCXX_ENABLE_SHARED "Build libc++ as a shared library." ON)
 option(LIBCXX_ENABLE_STATIC "Build libc++ as a static library." ON)
 option(LIBCXX_ENABLE_FILESYSTEM
@@ -759,9 +755,9 @@ config_define_if_not(LIBCXX_ENABLE_WIDE_CHARACTERS _LIBCPP_HAS_NO_WIDE_CHARACTER
 config_define_if_not(LIBCXX_ENABLE_TIME_ZONE_DATABASE _LIBCPP_HAS_NO_TIME_ZONE_DATABASE)
 config_define_if_not(LIBCXX_ENABLE_VENDOR_AVAILABILITY_ANNOTATIONS _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS)
 
+# TODO: Remove in LLVM 21. We're leaving an error to make this fail explicitly.
 if (LIBCXX_ENABLE_ASSERTIONS)
-  message(DEPRECATION "LIBCXX_ENABLE_ASSERTIONS is deprecated and will be removed in LLVM 20. Please use LIBCXX_HARDENING_MODE instead.")
-  set(LIBCXX_HARDENING_MODE "extensive")
+  message(FATAL_ERROR "LIBCXX_ENABLE_ASSERTIONS has been removed. Please use LIBCXX_HARDENING_MODE instead.")
 endif()
 if (LIBCXX_HARDENING_MODE STREQUAL "none")
   config_define(2 _LIBCPP_HARDENING_MODE_DEFAULT)
diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 05b08da52153501..db24b65caca6c0a 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -264,6 +264,8 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_move_iterator_concept``                        ``202207L``
     ---------------------------------------------------------- -----------------
+    ``__cpp_lib_optional``                                     ``202106L``
+    ---------------------------------------------------------- -----------------
     ``__cpp_lib_polymorphic_allocator``                        ``201902L``
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_ranges``                                       ``202110L``
@@ -300,6 +302,8 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_unwrap_ref``                                   ``201811L``
     ---------------------------------------------------------- -----------------
+    ``__cpp_lib_variant``                                      ``202106L``
+    ---------------------------------------------------------- -----------------
     **C++23**
     ----------------------------------------------------------------------------
     ``__cpp_lib_adaptor_iterator_pair_constructor``            ``202106L``
@@ -491,5 +495,7 @@ Status
     ``__cpp_lib_to_string``                                    *unimplemented*
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_tuple_like``                                   *unimplemented*
+    ---------------------------------------------------------- -----------------
+    ``__cpp_lib_variant``                                      ``202306L``
     ========================================================== =================
 
diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index 39546493ae8d6fd..bf3aafe6139ee95 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -38,11 +38,13 @@ What's New in Libc++ 20.0.0?
 Implemented Papers
 ------------------
 
+- P0619R4: Reviewing Deprecated Facilities of C++17 for C++20 (`Github <https://github.com/llvm/llvm-project/issues/99985>`__)
 - P2747R2: ``constexpr`` placement new (`Github <https://github.com/llvm/llvm-project/issues/105427>`__)
 - P2609R3: Relaxing Ranges Just A Smidge (`Github <https://github.com/llvm/llvm-project/issues/105253>`__)
 - P2985R0: A type trait for detecting virtual base classes (`Github <https://github.com/llvm/llvm-project/issues/105432>`__)
 - ``std::jthread`` and ``<stop_token>`` are not guarded behind ``-fexperimental-library`` anymore
 - P2674R1: A trait for implicit lifetime types (`Github <https://github.com/llvm/llvm-project/issues/105259>`__)
+- P0429R9: A Standard ``flat_map`` is partially implemented and ``flat_map`` is provided (`Github <https://github.com/llvm/llvm-project/issues/105190>`__)
 
 Improvements and New Features
 -----------------------------
@@ -63,8 +65,9 @@ Improvements and New Features
 Deprecations and Removals
 -------------------------
 
-- TODO: The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable and the ``_LIBCPP_ENABLE_ASSERTIONS`` macro that were used to
-  enable the safe mode will be removed in LLVM 20.
+- The ``LIBCXX_ENABLE_ASSERTIONS`` CMake variable and the ``_LIBCPP_ENABLE_ASSERTIONS`` macro that were used to
+  enable the safe mode have been removed in LLVM 20. Please use :ref:`support for hardening <using-hardening-modes>`
+  instead.
 
 - Support for the C++20 synchronization library (``<barrier>``, ``<latch>``, ``atomic::wait``, etc.) has been
   removed in language modes prior to C++20. If you are using these features prior to C++20, you will need to
@@ -87,15 +90,14 @@ Deprecations and Removals
   the ``_LIBCPP_VERBOSE_ABORT_NOT_NOEXCEPT`` macro can be defined to make the function non-``noexcept``. That macro
   will be removed in LLVM 21.
 
+- ``<ccomplex>``, ``<cstdalign>`` (previously missing), ``<cstdbool>``, and ``<ctgmath>`` are deprecated since C++17 as
+  specified by the standard. They, together with ``<ciso646>``, are removed in C++20, but libc++ still provides these
+  headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to
+  suppress deprecation for these headers.
+
 Upcoming Deprecations and Removals
 ----------------------------------
 
-LLVM 20
-~~~~~~~
-
-- TODO
-
-
 LLVM 21
 ~~~~~~~
 
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index cc75d28f14aac20..5cd77be4d58def7 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -34,7 +34,7 @@
 "`P0528R3 <https://wg21.link/P0528R3>`__","The Curious Case of Padding Bits, Featuring Atomic Compare-and-Exchange","2018-06 (Rapperswil)","","",""
 "`P0542R5 <https://wg21.link/P0542R5>`__","Support for contract based programming in C++","2018-06 (Rapperswil)","|Nothing To Do|","n/a","Pulled at the 2019-07 meeting in Cologne"
 "`P0556R3 <https://wg21.link/P0556R3>`__","Integral power-of-2 operations","2018-06 (Rapperswil)","|Complete|","9.0",""
-"`P0619R4 <https://wg21.link/P0619R4>`__","Reviewing Deprecated Facilities of C++17 for C++20","2018-06 (Rapperswil)","|Partial|","","Only sections D.7, D.8, D.9, D.10, D.11, D.12, and D.13 are implemented. Section D.4 remains undone."
+"`P0619R4 <https://wg21.link/P0619R4>`__","Reviewing Deprecated Facilities of C++17 for C++20","2018-06 (Rapperswil)","|Complete|","20.0","Removed headers are still provided as an extension, but with deprecation warnings"
 "`P0646R1 <https://wg21.link/P0646R1>`__","Improving the Return Value of Erase-Like Algorithms","2018-06 (Rapperswil)","|Complete|","10.0",""
 "`P0722R3 <https://wg21.link/P0722R3>`__","Efficient sized delete for variable sized classes","2018-06 (Rapperswil)","|Complete|","9.0",""
 "`P0758R1 <https://wg21.link/P0758R1>`__","Implicit conversion traits and utility functions","2018-06 (Rapperswil)","|Complete|","",""
@@ -192,7 +192,7 @@
 "`P2106R0 <https://wg21.link/P2106R0>`__","Alternative wording for GB315 and GB316","2020-02 (Prague)","|Complete|","15.0",""
 "`P2116R0 <https://wg21.link/P2116R0>`__","Remove tuple-like protocol support from fixed-extent span","2020-02 (Prague)","|Complete|","11.0",""
 "","","","","",""
-"`P2231R1 <https://wg21.link/P2231R1>`__","Missing constexpr in std::optional and std::variant","2021-06 (Virtual)","|Complete|","19.0",""
+"`P2231R1 <https://wg21.link/P2231R1>`__","Missing constexpr in std::optional and std::variant","2021-06 (Virtual)","|Complete|","19.0","Changes of feature-test macros are completed in LLVM 20."
 "`P2325R3 <https://wg21.link/P2325R3>`__","Views should not be required to be default constructible","2021-06 (Virtual)","|Complete|","16.0",""
 "`P2210R2 <https://wg21.link/P2210R2>`__","Superior String Splitting","2021-06 (Virtual)","|Complete|","16.0",""
 "`P2216R3 <https://wg21.link/P2216R3>`__","std::format improvements","2021-06 (Virtual)","|Complete|","15.0",""
diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv
index c64f1c4171fce1a..6f1626da73507ee 100644
--- a/libcxx/docs/Status/Cxx23Papers.csv
+++ b/libcxx/docs/Status/Cxx23Papers.csv
@@ -52,7 +52,7 @@
 "`P2443R1 <https://wg21.link/P2443R1>`__","``views::chunk_by``","2022-02 (Virtual)","|Complete|","18.0",""
 "","","","","",""
 "`P0009R18 <https://wg21.link/P0009R18>`__","mdspan: A Non-Owning Multidimensional Array Reference","2022-07 (Virtual)","|Complete|","18.0",""
-"`P0429R9 <https://wg21.link/P0429R9>`__","A Standard ``flat_map``","2022-07 (Virtual)","","",""
+"`P0429R9 <https://wg21.link/P0429R9>`__","A Standard ``flat_map``","2022-07 (Virtual)","|In progress|","",""
 "`P1169R4 <https://wg21.link/P1169R4>`__","``static operator()``","2022-07 (Virtual)","|Complete|","16.0",""
 "`P1222R4 <https://wg21.link/P1222R4>`__","A Standard ``flat_set``","2022-07 (Virtual)","","",""
 "`P1223R5 <https://wg21.link/P1223R5>`__","``ranges::find_last()``, ``ranges::find_last_if()``, and ``ranges::find_last_if_not()``","2022-07 (Virtual)","|Complete|","19.0",""
diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index 8864b1ebe288910..d5d5cdda065ae10 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -17,7 +17,7 @@
 "`P0792R14 <https://wg21.link/P0792R14>`__","``function_ref``: a type-erased callable reference","2023-06 (Varna)","","",""
 "`P2874R2 <https://wg21.link/P2874R2>`__","Mandating Annex D Require No More","2023-06 (Varna)","","",""
 "`P2757R3 <https://wg21.link/P2757R3>`__","Type-checking format args","2023-06 (Varna)","","",""
-"`P2637R3 <https://wg21.link/P2637R3>`__","Member ``visit``","2023-06 (Varna)","|Complete|","19.0",""
+"`P2637R3 <https://wg21.link/P2637R3>`__","Member ``visit``","2023-06 (Varna)","|Complete|","19.0","Change of ``__cpp_lib_variant`` is completed in LLVM 20. Change of ``__cpp_lib_format`` is blocked by `P2419R2 <https://wg21.link/P2419R2>`__."
 "`P2641R4 <https://wg21.link/P2641R4>`__","Checking if a ``union`` alternative is active","2023-06 (Varna)","","",""
 "`P1759R6 <https://wg21.link/P1759R6>`__","Native handles and file streams","2023-06 (Varna)","|Complete|","18.0",""
 "`P2697R1 <https://wg21.link/P2697R1>`__","Interfacing ``bitset`` with ``string_view``","2023-06 (Varna)","|Complete|","18.0",""
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 975adc03ec81da0..87eaf64b2450171 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -358,6 +358,8 @@ set(files
   __filesystem/recursive_directory_iterator.h
   __filesystem/space_info.h
   __filesystem/u8path.h
+  __flat_map/flat_map.h
+  __flat_map/sorted_unique.h
   __format/buffer.h
   __format/concepts.h
   __format/container_adaptor.h
@@ -492,15 +494,16 @@ set(files
   __locale
   __locale_dir/locale_base_api.h
   __locale_dir/locale_base_api/android.h
+  __locale_dir/locale_base_api/apple.h
   __locale_dir/locale_base_api/bsd_locale_defaults.h
   __locale_dir/locale_base_api/bsd_locale_fallbacks.h
+  __locale_dir/locale_base_api/freebsd.h
   __locale_dir/locale_base_api/fuchsia.h
   __locale_dir/locale_base_api/ibm.h
-  __locale_dir/locale_base_api/locale_guard.h
   __locale_dir/locale_base_api/musl.h
-  __locale_dir/locale_base_api/newlib.h
   __locale_dir/locale_base_api/openbsd.h
   __locale_dir/locale_base_api/win32.h
+  __locale_dir/locale_guard.h
   __math/abs.h
   __math/copysign.h
   __math/error_functions.h
@@ -920,6 +923,7 @@ set(files
   coroutine
   csetjmp
   csignal
+  cstdalign
   cstdarg
   cstdbool
   cstddef
@@ -958,6 +962,7 @@ set(files
   ext/hash_set
   fenv.h
   filesystem
+  flat_map
   float.h
   format
   forward_list
diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h
index 1f38f5d2d99b43c..ab3c598418828af 100644
--- a/libcxx/include/__algorithm/comp.h
+++ b/libcxx/include/__algorithm/comp.h
@@ -42,6 +42,9 @@ struct __less<void, void> {
   }
 };
 
+template <class _Tp>
+inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
+
 template <class _Tp>
 inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value;
 
diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h
index 4f2b2bf26382da3..5f2e5cb2a1eeab7 100644
--- a/libcxx/include/__algorithm/ranges_minmax.h
+++ b/libcxx/include/__algorithm/ranges_minmax.h
@@ -89,7 +89,7 @@ struct __minmax {
     // vectorize the code.
     if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> &&
                   __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value &&
-                  __desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) {
+                  __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) {
       minmax_result<_ValueT> __result = {__r[0], __r[0]};
       for (auto __e : __r) {
         if (__e < __result.min)
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 0b2137dee2f77e3..39868b8b6a30aef 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -27,11 +27,13 @@
 #include <__functional/ranges_operations.h>
 #include <__iterator/iterator_traits.h>
 #include <__type_traits/conditional.h>
+#include <__type_traits/desugars_to.h>
 #include <__type_traits/disjunction.h>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_arithmetic.h>
 #include <__type_traits/is_constant_evaluated.h>
 #include <__type_traits/is_same.h>
+#include <__type_traits/is_trivially_copyable.h>
 #include <__type_traits/remove_cvref.h>
 #include <__utility/move.h>
 #include <__utility/pair.h>
@@ -47,110 +49,11 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-// stable, 2-3 compares, 0-2 swaps
-
-template <class _AlgPolicy, class _Compare, class _ForwardIterator>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned
-__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) {
-  using _Ops = _IterOps<_AlgPolicy>;
-
-  unsigned __r = 0;
-  if (!__c(*__y, *__x)) // if x <= y
-  {
-    if (!__c(*__z, *__y))      // if y <= z
-      return __r;              // x <= y && y <= z
-                               // x <= y && y > z
-    _Ops::iter_swap(__y, __z); // x <= z && y < z
-    __r = 1;
-    if (__c(*__y, *__x)) // if x > y
-    {
-      _Ops::iter_swap(__x, __y); // x < y && y <= z
-      __r = 2;
-    }
-    return __r; // x <= y && y < z
-  }
-  if (__c(*__z, *__y)) // x > y, if y > z
-  {
-    _Ops::iter_swap(__x, __z); // x < y && y < z
-    __r = 1;
-    return __r;
-  }
-  _Ops::iter_swap(__x, __y); // x > y && y <= z
-  __r = 1;                   // x < y && x <= z
-  if (__c(*__z, *__y))       // if y > z
-  {
-    _Ops::iter_swap(__y, __z); // x <= y && y < z
-    __r = 2;
-  }
-  return __r;
-} // x <= y && y <= z
-
-// stable, 3-6 compares, 0-5 swaps
-
-template <class _AlgPolicy, class _Compare, class _ForwardIterator>
-_LIBCPP_HIDE_FROM_ABI void
-__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) {
-  using _Ops = _IterOps<_AlgPolicy>;
-  std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
-  if (__c(*__x4, *__x3)) {
-    _Ops::iter_swap(__x3, __x4);
-    if (__c(*__x3, *__x2)) {
-      _Ops::iter_swap(__x2, __x3);
-      if (__c(*__x2, *__x1)) {
-        _Ops::iter_swap(__x1, __x2);
-      }
-    }
-  }
-}
-
-// stable, 4-10 compares, 0-9 swaps
-
-template <class _AlgPolicy, class _Comp, class _ForwardIterator>
-_LIBCPP_HIDE_FROM_ABI void
-__sort5(_ForwardIterator __x1,
-        _ForwardIterator __x2,
-        _ForwardIterator __x3,
-        _ForwardIterator __x4,
-        _ForwardIterator __x5,
-        _Comp __comp) {
-  using _Ops = _IterOps<_AlgPolicy>;
-
-  std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp);
-  if (__comp(*__x5, *__x4)) {
-    _Ops::iter_swap(__x4, __x5);
-    if (__comp(*__x4, *__x3)) {
-      _Ops::iter_swap(__x3, __x4);
-      if (__comp(*__x3, *__x2)) {
-        _Ops::iter_swap(__x2, __x3);
-        if (__comp(*__x2, *__x1)) {
-          _Ops::iter_swap(__x1, __x2);
-        }
-      }
-    }
-  }
-}
-
-// The comparator being simple is a prerequisite for using the branchless optimization.
-template <class _Tp>
-struct __is_simple_comparator : false_type {};
-template <>
-struct __is_simple_comparator<__less<>&> : true_type {};
-template <class _Tp>
-struct __is_simple_comparator<less<_Tp>&> : true_type {};
-template <class _Tp>
-struct __is_simple_comparator<greater<_Tp>&> : true_type {};
-#if _LIBCPP_STD_VER >= 20
-template <>
-struct __is_simple_comparator<ranges::less&> : true_type {};
-template <>
-struct __is_simple_comparator<ranges::greater&> : true_type {};
-#endif
-
 template <class _Compare, class _Iter, class _Tp = typename iterator_traits<_Iter>::value_type>
-using __use_branchless_sort =
-    integral_constant<bool,
-                      __libcpp_is_contiguous_iterator<_Iter>::value && sizeof(_Tp) <= sizeof(void*) &&
-                          is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>;
+inline const bool __use_branchless_sort =
+    __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value &&
+    (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> ||
+     __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>);
 
 namespace __detail {
 
@@ -161,59 +64,88 @@ enum { __block_size = sizeof(uint64_t) * 8 };
 
 // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary.
 template <class _Compare, class _RandomAccessIterator>
-inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
+__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) {
   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
   bool __r         = __c(*__x, *__y);
   value_type __tmp = __r ? *__x : *__y;
   *__y             = __r ? *__y : *__x;
   *__x             = __tmp;
+  return !__r;
 }
 
 // Ensures that *__x, *__y and *__z are ordered according to the comparator __c,
 // under the assumption that *__y and *__z are already ordered.
 template <class _Compare, class _RandomAccessIterator>
-inline _LIBCPP_HIDE_FROM_ABI void
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
 __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
   // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`).
   using value_type = typename iterator_traits<_RandomAccessIterator>::value_type;
-  bool __r         = __c(*__z, *__x);
-  value_type __tmp = __r ? *__z : *__x;
-  *__z             = __r ? *__x : *__z;
-  __r              = __c(__tmp, *__y);
-  *__x             = __r ? *__x : *__y;
-  *__y             = __r ? *__y : __tmp;
+  bool __r1        = __c(*__z, *__x);
+  value_type __tmp = __r1 ? *__z : *__x;
+  *__z             = __r1 ? *__x : *__z;
+  bool __r2        = __c(__tmp, *__y);
+  *__x             = __r2 ? *__x : *__y;
+  *__y             = __r2 ? *__y : __tmp;
+  return !__r1 || !__r2;
 }
 
+// stable, 2-3 compares, 0-2 swaps
+
 template <class,
           class _Compare,
           class _RandomAccessIterator,
-          __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless(
-    _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
-  std::__cond_swap<_Compare>(__x2, __x3, __c);
-  std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
+          __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
+__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
+  bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c);
+  bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c);
+  return __swapped1 || __swapped2;
 }
 
 template <class _AlgPolicy,
           class _Compare,
           class _RandomAccessIterator,
-          __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless(
-    _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) {
-  std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
-}
+          __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool
+__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) {
+  using _Ops = _IterOps<_AlgPolicy>;
+
+  if (!__c(*__y, *__x)) // if x <= y
+  {
+    if (!__c(*__z, *__y))        // if y <= z
+      return false;              // x <= y && y <= z
+                                 // x <= y && y > z
+    _Ops::iter_swap(__y, __z);   // x <= z && y < z
+    if (__c(*__y, *__x))         // if x > y
+      _Ops::iter_swap(__x, __y); // x < y && y <= z
+    return true;                 // x <= y && y < z
+  }
+  if (__c(*__z, *__y)) // x > y, if y > z
+  {
+    _Ops::iter_swap(__x, __z); // x < y && y < z
+    return true;
+  }
+  _Ops::iter_swap(__x, __y); // x > y && y <= z
+  // x < y && x <= z
+  if (__c(*__z, *__y))         // if y > z
+    _Ops::iter_swap(__y, __z); // x <= y && y < z
+  return true;
+} // x <= y && y <= z
+
+// stable, 3-6 compares, 0-5 swaps
 
 template <class,
           class _Compare,
           class _RandomAccessIterator,
-          __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless(
-    _RandomAccessIterator __x1,
-    _RandomAccessIterator __x2,
-    _RandomAccessIterator __x3,
-    _RandomAccessIterator __x4,
-    _Compare __c) {
+          __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI void
+__sort4(_RandomAccessIterator __x1,
+        _RandomAccessIterator __x2,
+        _RandomAccessIterator __x3,
+        _RandomAccessIterator __x4,
+        _Compare __c) {
   std::__cond_swap<_Compare>(__x1, __x3, __c);
   std::__cond_swap<_Compare>(__x2, __x4, __c);
   std::__cond_swap<_Compare>(__x1, __x2, __c);
@@ -224,27 +156,39 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless(
 template <class _AlgPolicy,
           class _Compare,
           class _RandomAccessIterator,
-          __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless(
-    _RandomAccessIterator __x1,
-    _RandomAccessIterator __x2,
-    _RandomAccessIterator __x3,
-    _RandomAccessIterator __x4,
-    _Compare __c) {
-  std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c);
+          __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI void
+__sort4(_RandomAccessIterator __x1,
+        _RandomAccessIterator __x2,
+        _RandomAccessIterator __x3,
+        _RandomAccessIterator __x4,
+        _Compare __c) {
+  using _Ops = _IterOps<_AlgPolicy>;
+  std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c);
+  if (__c(*__x4, *__x3)) {
+    _Ops::iter_swap(__x3, __x4);
+    if (__c(*__x3, *__x2)) {
+      _Ops::iter_swap(__x2, __x3);
+      if (__c(*__x2, *__x1)) {
+        _Ops::iter_swap(__x1, __x2);
+      }
+    }
+  }
 }
 
+// stable, 4-10 compares, 0-9 swaps
+
 template <class _AlgPolicy,
           class _Compare,
           class _RandomAccessIterator,
-          __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless(
-    _RandomAccessIterator __x1,
-    _RandomAccessIterator __x2,
-    _RandomAccessIterator __x3,
-    _RandomAccessIterator __x4,
-    _RandomAccessIterator __x5,
-    _Compare __c) {
+          __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI void
+__sort5(_RandomAccessIterator __x1,
+        _RandomAccessIterator __x2,
+        _RandomAccessIterator __x3,
+        _RandomAccessIterator __x4,
+        _RandomAccessIterator __x5,
+        _Compare __c) {
   std::__cond_swap<_Compare>(__x1, __x2, __c);
   std::__cond_swap<_Compare>(__x4, __x5, __c);
   std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c);
@@ -256,16 +200,29 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless(
 template <class _AlgPolicy,
           class _Compare,
           class _RandomAccessIterator,
-          __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>::value, int> = 0>
-inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless(
-    _RandomAccessIterator __x1,
-    _RandomAccessIterator __x2,
-    _RandomAccessIterator __x3,
-    _RandomAccessIterator __x4,
-    _RandomAccessIterator __x5,
-    _Compare __c) {
-  std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>(
-      std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c);
+          __enable_if_t<!__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI void
+__sort5(_RandomAccessIterator __x1,
+        _RandomAccessIterator __x2,
+        _RandomAccessIterator __x3,
+        _RandomAccessIterator __x4,
+        _RandomAccessIterator __x5,
+        _Compare __comp) {
+  using _Ops = _IterOps<_AlgPolicy>;
+
+  std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp);
+  if (__comp(*__x5, *__x4)) {
+    _Ops::iter_swap(__x4, __x5);
+    if (__comp(*__x4, *__x3)) {
+      _Ops::iter_swap(__x3, __x4);
+      if (__comp(*__x3, *__x2)) {
+        _Ops::iter_swap(__x2, __x3);
+        if (__comp(*__x2, *__x1)) {
+          _Ops::iter_swap(__x1, __x2);
+        }
+      }
+    }
+  }
 }
 
 // Assumes size > 0
@@ -355,14 +312,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator
       _Ops::iter_swap(__first, __last);
     return true;
   case 3:
-    std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
+    std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp);
     return true;
   case 4:
-    std::__sort4_maybe_branchless<_AlgPolicy, _Comp>(
+    std::__sort4<_AlgPolicy, _Comp>(
         __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
     return true;
   case 5:
-    std::__sort5_maybe_branchless<_AlgPolicy, _Comp>(
+    std::__sort5<_AlgPolicy, _Comp>(
         __first,
         __first + difference_type(1),
         __first + difference_type(2),
@@ -373,7 +330,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator
   }
   typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type;
   _RandomAccessIterator __j = __first + difference_type(2);
-  std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
+  std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp);
   const unsigned __limit = 8;
   unsigned __count       = 0;
   for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) {
@@ -780,14 +737,14 @@ void __introsort(_RandomAccessIterator __first,
         _Ops::iter_swap(__first, __last);
       return;
     case 3:
-      std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
+      std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp);
       return;
     case 4:
-      std::__sort4_maybe_branchless<_AlgPolicy, _Compare>(
+      std::__sort4<_AlgPolicy, _Compare>(
           __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp);
       return;
     case 5:
-      std::__sort5_maybe_branchless<_AlgPolicy, _Compare>(
+      std::__sort5<_AlgPolicy, _Compare>(
           __first,
           __first + difference_type(1),
           __first + difference_type(2),
@@ -928,10 +885,8 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co
   // Only use bitset partitioning for arithmetic types.  We should also check
   // that the default comparator is in use so that we are sure that there are no
   // branches in the comparator.
-  std::__introsort<_AlgPolicy,
-                   _Comp&,
-                   _RandomAccessIterator,
-                   __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit);
+  std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >(
+      __first, __last, __comp, __depth_limit);
 }
 
 template <class _Type, class... _Options>
diff --git a/libcxx/include/__config b/libcxx/include/__config
index fc09a97274d7c78..1cf80a46686ab91 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -39,16 +39,9 @@
 
 // HARDENING {
 
-// This is for backward compatibility -- make enabling `_LIBCPP_ENABLE_ASSERTIONS` (which predates hardening modes)
-// equivalent to setting the extensive mode. This is deprecated and will be removed in LLVM 20.
+// TODO: Remove in LLVM 21. We're making this an error to catch folks who might not have migrated.
 #  ifdef _LIBCPP_ENABLE_ASSERTIONS
-#    warning "_LIBCPP_ENABLE_ASSERTIONS is deprecated, please use _LIBCPP_HARDENING_MODE instead"
-#    if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1
-#      error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1"
-#    endif
-#    if _LIBCPP_ENABLE_ASSERTIONS
-#      define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_EXTENSIVE
-#    endif
+#    error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE instead"
 #  endif
 
 // The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values:
diff --git a/libcxx/include/__flat_map/flat_map.h b/libcxx/include/__flat_map/flat_map.h
new file mode 100644
index 000000000000000..9ca32d5295bd27d
--- /dev/null
+++ b/libcxx/include/__flat_map/flat_map.h
@@ -0,0 +1,1359 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FLAT_MAP_FLAT_MAP_H
+#define _LIBCPP___FLAT_MAP_FLAT_MAP_H
+
+#include <__algorithm/lexicographical_compare_three_way.h>
+#include <__algorithm/ranges_adjacent_find.h>
+#include <__algorithm/ranges_equal.h>
+#include <__algorithm/ranges_inplace_merge.h>
+#include <__algorithm/ranges_lower_bound.h>
+#include <__algorithm/ranges_partition_point.h>
+#include <__algorithm/ranges_stable_sort.h>
+#include <__algorithm/ranges_unique.h>
+#include <__algorithm/ranges_upper_bound.h>
+#include <__compare/synth_three_way.h>
+#include <__concepts/convertible_to.h>
+#include <__concepts/swappable.h>
+#include <__config>
+#include <__flat_map/sorted_unique.h>
+#include <__functional/invoke.h>
+#include <__functional/is_transparent.h>
+#include <__functional/operations.h>
+#include <__iterator/concepts.h>
+#include <__iterator/distance.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/next.h>
+#include <__iterator/ranges_iterator_traits.h>
+#include <__iterator/reverse_iterator.h>
+#include <__memory/allocator_traits.h>
+#include <__memory/uses_allocator.h>
+#include <__memory/uses_allocator_construction.h>
+#include <__ranges/concepts.h>
+#include <__ranges/container_compatible_range.h>
+#include <__ranges/drop_view.h>
+#include <__ranges/ref_view.h>
+#include <__ranges/subrange.h>
+#include <__ranges/zip_view.h>
+#include <__type_traits/conjunction.h>
+#include <__type_traits/container_traits.h>
+#include <__type_traits/invoke.h>
+#include <__type_traits/is_allocator.h>
+#include <__type_traits/is_nothrow_constructible.h>
+#include <__type_traits/is_same.h>
+#include <__type_traits/maybe_const.h>
+#include <__utility/exception_guard.h>
+#include <__utility/pair.h>
+#include <initializer_list>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+#if _LIBCPP_STD_VER >= 23
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <class _Key,
+          class _Tp,
+          class _Compare         = less<_Key>,
+          class _KeyContainer    = vector<_Key>,
+          class _MappedContainer = vector<_Tp>>
+class flat_map {
+  template <bool _Const>
+  struct __iterator;
+
+  template <class, class, class, class, class>
+  friend class flat_map;
+
+  static_assert(is_same_v<_Key, typename _KeyContainer::value_type>);
+  static_assert(is_same_v<_Tp, typename _MappedContainer::value_type>);
+  static_assert(!is_same_v<_KeyContainer, std::vector<bool>>, "vector<bool> is not a sequence container");
+  static_assert(!is_same_v<_MappedContainer, std::vector<bool>>, "vector<bool> is not a sequence container");
+
+public:
+  // types
+  using key_type               = _Key;
+  using mapped_type            = _Tp;
+  using value_type             = pair<key_type, mapped_type>;
+  using key_compare            = __type_identity_t<_Compare>;
+  using reference              = pair<const key_type&, mapped_type&>;
+  using const_reference        = pair<const key_type&, const mapped_type&>;
+  using size_type              = size_t;
+  using difference_type        = ptrdiff_t;
+  using iterator               = __iterator<false>; // see [container.requirements]
+  using const_iterator         = __iterator<true>;  // see [container.requirements]
+  using reverse_iterator       = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+  using key_container_type     = _KeyContainer;
+  using mapped_container_type  = _MappedContainer;
+
+  class value_compare {
+  private:
+    key_compare __comp_;
+    value_compare(key_compare __c) : __comp_(__c) {}
+    friend flat_map;
+
+  public:
+    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+      return __comp_(__x.first, __y.first);
+    }
+  };
+
+  struct containers {
+    key_container_type keys;
+    mapped_container_type values;
+  };
+
+private:
+  template <class _Allocator>
+  _LIBCPP_HIDE_FROM_ABI static constexpr bool __allocator_ctor_constraint =
+      _And<uses_allocator<key_container_type, _Allocator>, uses_allocator<mapped_container_type, _Allocator>>::value;
+
+  _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_compare_transparent = __is_transparent_v<_Compare, _Compare>;
+
+  template <bool _Const>
+  struct __iterator {
+  private:
+    using __key_iterator    = ranges::iterator_t<const key_container_type>;
+    using __mapped_iterator = ranges::iterator_t<__maybe_const<_Const, mapped_container_type>>;
+    using __reference       = pair<iter_reference_t<__key_iterator>, iter_reference_t<__mapped_iterator>>;
+
+    struct __arrow_proxy {
+      __reference __ref_;
+      _LIBCPP_HIDE_FROM_ABI __reference* operator->() { return std::addressof(__ref_); }
+    };
+
+    __key_iterator __key_iter_;
+    __mapped_iterator __mapped_iter_;
+
+    friend flat_map;
+
+  public:
+    using iterator_concept = random_access_iterator_tag;
+    // `flat_map::iterator` only satisfy "Cpp17InputIterator" named requirements, because
+    // its `reference` is not a reference type.
+    // However, to avoid surprising runtime behaviour when it is used with the
+    // Cpp17 algorithms or operations, iterator_category is set to random_access_iterator_tag.
+    using iterator_category = random_access_iterator_tag;
+    using value_type        = flat_map::value_type;
+    using difference_type   = flat_map::difference_type;
+
+    _LIBCPP_HIDE_FROM_ABI __iterator() = default;
+
+    _LIBCPP_HIDE_FROM_ABI __iterator(__iterator<!_Const> __i)
+      requires _Const && convertible_to<ranges::iterator_t<key_container_type>, __key_iterator> &&
+                   convertible_to<ranges::iterator_t<mapped_container_type>, __mapped_iterator>
+        : __key_iter_(std::move(__i.__key_iter_)), __mapped_iter_(std::move(__i.__mapped_iter_)) {}
+
+    _LIBCPP_HIDE_FROM_ABI __iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter)
+        : __key_iter_(std::move(__key_iter)), __mapped_iter_(std::move(__mapped_iter)) {}
+
+    _LIBCPP_HIDE_FROM_ABI __reference operator*() const { return __reference(*__key_iter_, *__mapped_iter_); }
+    _LIBCPP_HIDE_FROM_ABI __arrow_proxy operator->() const { return __arrow_proxy{**this}; }
+
+    _LIBCPP_HIDE_FROM_ABI __iterator& operator++() {
+      ++__key_iter_;
+      ++__mapped_iter_;
+      return *this;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI __iterator operator++(int) {
+      __iterator __tmp(*this);
+      ++*this;
+      return __tmp;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI __iterator& operator--() {
+      --__key_iter_;
+      --__mapped_iter_;
+      return *this;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI __iterator operator--(int) {
+      __iterator __tmp(*this);
+      --*this;
+      return __tmp;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI __iterator& operator+=(difference_type __x) {
+      __key_iter_ += __x;
+      __mapped_iter_ += __x;
+      return *this;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI __iterator& operator-=(difference_type __x) {
+      __key_iter_ -= __x;
+      __mapped_iter_ -= __x;
+      return *this;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI __reference operator[](difference_type __n) const { return *(*this + __n); }
+
+    _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const __iterator& __x, const __iterator& __y) {
+      return __x.__key_iter_ == __y.__key_iter_;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __iterator& __x, const __iterator& __y) {
+      return __x.__key_iter_ < __y.__key_iter_;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI friend bool operator>(const __iterator& __x, const __iterator& __y) { return __y < __x; }
+
+    _LIBCPP_HIDE_FROM_ABI friend bool operator<=(const __iterator& __x, const __iterator& __y) { return !(__y < __x); }
+
+    _LIBCPP_HIDE_FROM_ABI friend bool operator>=(const __iterator& __x, const __iterator& __y) { return !(__x < __y); }
+
+    _LIBCPP_HIDE_FROM_ABI friend auto operator<=>(const __iterator& __x, const __iterator& __y)
+      requires three_way_comparable<__key_iterator>
+    {
+      return __x.__key_iter_ <=> __y.__key_iter_;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI friend __iterator operator+(const __iterator& __i, difference_type __n) {
+      auto __tmp = __i;
+      __tmp += __n;
+      return __tmp;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI friend __iterator operator+(difference_type __n, const __iterator& __i) { return __i + __n; }
+
+    _LIBCPP_HIDE_FROM_ABI friend __iterator operator-(const __iterator& __i, difference_type __n) {
+      auto __tmp = __i;
+      __tmp -= __n;
+      return __tmp;
+    }
+
+    _LIBCPP_HIDE_FROM_ABI friend difference_type operator-(const __iterator& __x, const __iterator& __y) {
+      return difference_type(__x.__key_iter_ - __y.__key_iter_);
+    }
+  };
+
+public:
+  // [flat.map.cons], construct/copy/destroy
+  _LIBCPP_HIDE_FROM_ABI flat_map() noexcept(
+      is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> &&
+      is_nothrow_default_constructible_v<_Compare>)
+      : __containers_(), __compare_() {}
+
+  _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map&) = default;
+
+  _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other) noexcept(
+      is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> &&
+      is_nothrow_move_constructible_v<_Compare>)
+#  if _LIBCPP_HAS_EXCEPTIONS
+      try
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+      : __containers_(std::move(__other.__containers_)), __compare_(std::move(__other.__compare_)) {
+    __other.clear();
+#  if _LIBCPP_HAS_EXCEPTIONS
+  } catch (...) {
+    __other.clear();
+    // gcc does not like the `throw` keyword in a conditional noexcept function
+    if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> &&
+                    is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>)) {
+      throw;
+    }
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map& __other, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_tag{},
+                 __alloc,
+                 __other.__containers_.keys,
+                 __other.__containers_.values,
+                 __other.__compare_) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other, const _Allocator& __alloc)
+#  if _LIBCPP_HAS_EXCEPTIONS
+      try
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+      : flat_map(__ctor_uses_allocator_tag{},
+                 __alloc,
+                 std::move(__other.__containers_.keys),
+                 std::move(__other.__containers_.values),
+                 std::move(__other.__compare_)) {
+    __other.clear();
+#  if _LIBCPP_HAS_EXCEPTIONS
+  } catch (...) {
+    __other.clear();
+    throw;
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+  }
+
+  _LIBCPP_HIDE_FROM_ABI flat_map(
+      key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare())
+      : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_map keys and mapped containers have different size");
+    __sort_and_unique();
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_map keys and mapped containers have different size");
+    __sort_and_unique();
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(const key_container_type& __key_cont,
+           const mapped_container_type& __mapped_cont,
+           const key_compare& __comp,
+           const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_map keys and mapped containers have different size");
+    __sort_and_unique();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t,
+           key_container_type __key_cont,
+           mapped_container_type __mapped_cont,
+           const key_compare& __comp = key_compare())
+      : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_map keys and mapped containers have different size");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
+        __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates");
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t,
+           const key_container_type& __key_cont,
+           const mapped_container_type& __mapped_cont,
+           const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_map keys and mapped containers have different size");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
+        __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates");
+  }
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t,
+           const key_container_type& __key_cont,
+           const mapped_container_type& __mapped_cont,
+           const key_compare& __comp,
+           const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(),
+                                     "flat_map keys and mapped containers have different size");
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
+        __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates");
+  }
+
+  _LIBCPP_HIDE_FROM_ABI explicit flat_map(const key_compare& __comp) : __containers_(), __compare_(__comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(const key_compare& __comp, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI explicit flat_map(const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {}
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
+      : __containers_(), __compare_(__comp) {
+    insert(__first, __last);
+  }
+
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
+    insert(__first, __last);
+  }
+
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI flat_map(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {
+    insert(__first, __last);
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t __fr, _Range&& __rg)
+      : flat_map(__fr, std::forward<_Range>(__rg), key_compare()) {}
+
+  template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {
+    insert_range(std::forward<_Range>(__rg));
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_map(__comp) {
+    insert_range(std::forward<_Range>(__rg));
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range, class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
+    insert_range(std::forward<_Range>(__rg));
+  }
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare())
+      : __containers_(), __compare_(__comp) {
+    insert(sorted_unique, __first, __last);
+  }
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t,
+           _InputIterator __first,
+           _InputIterator __last,
+           const key_compare& __comp,
+           const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {
+    insert(sorted_unique, __first, __last);
+  }
+
+  template <class _InputIterator, class _Allocator>
+    requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>)
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc)
+      : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {
+    insert(sorted_unique, __first, __last);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list<value_type> __il, const key_compare& __comp = key_compare())
+      : flat_map(__il.begin(), __il.end(), __comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_map(__il.begin(), __il.end(), __comp, __alloc) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list<value_type> __il, const _Allocator& __alloc)
+      : flat_map(__il.begin(), __il.end(), __alloc) {}
+
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t, initializer_list<value_type> __il, const key_compare& __comp = key_compare())
+      : flat_map(sorted_unique, __il.begin(), __il.end(), __comp) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(sorted_unique_t, initializer_list<value_type> __il, const key_compare& __comp, const _Allocator& __alloc)
+      : flat_map(sorted_unique, __il.begin(), __il.end(), __comp, __alloc) {}
+
+  template <class _Allocator>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(sorted_unique_t, initializer_list<value_type> __il, const _Allocator& __alloc)
+      : flat_map(sorted_unique, __il.begin(), __il.end(), __alloc) {}
+
+  _LIBCPP_HIDE_FROM_ABI flat_map& operator=(initializer_list<value_type> __il) {
+    clear();
+    insert(__il);
+    return *this;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI flat_map& operator=(const flat_map&) = default;
+
+  _LIBCPP_HIDE_FROM_ABI flat_map& operator=(flat_map&& __other) noexcept(
+      is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> &&
+      is_nothrow_move_assignable_v<_Compare>) {
+    // No matter what happens, we always want to clear the other container before returning
+    // since we moved from it
+    auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; });
+    {
+      // If an exception is thrown, we have no choice but to clear *this to preserve invariants
+      auto __on_exception = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+      __containers_       = std::move(__other.__containers_);
+      __compare_          = std::move(__other.__compare_);
+      __on_exception.__complete();
+    }
+    return *this;
+  }
+
+  // iterators
+  _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept {
+    return iterator(__containers_.keys.begin(), __containers_.values.begin());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept {
+    return const_iterator(__containers_.keys.begin(), __containers_.values.begin());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator end() noexcept {
+    return iterator(__containers_.keys.end(), __containers_.values.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept {
+    return const_iterator(__containers_.keys.end(), __containers_.values.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }
+  _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); }
+  _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); }
+  _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); }
+
+  // [flat.map.capacity], capacity
+  [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); }
+
+  _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); }
+
+  _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept {
+    return std::min<size_type>(__containers_.keys.max_size(), __containers_.values.max_size());
+  }
+
+  // [flat.map.access], element access
+  _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __x)
+    requires is_constructible_v<mapped_type>
+  {
+    return try_emplace(__x).first->second;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __x)
+    requires is_constructible_v<mapped_type>
+  {
+    return try_emplace(std::move(__x)).first->second;
+  }
+
+  template <class _Kp>
+    requires(__is_compare_transparent && is_constructible_v<key_type, _Kp> && is_constructible_v<mapped_type> &&
+             !is_convertible_v<_Kp &&, const_iterator> && !is_convertible_v<_Kp &&, iterator>)
+  _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](_Kp&& __x) {
+    return try_emplace(std::forward<_Kp>(__x)).first->second;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __x) {
+    auto __it = find(__x);
+    if (__it == end()) {
+      std::__throw_out_of_range("flat_map::at(const key_type&): Key does not exist");
+    }
+    return __it->second;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __x) const {
+    auto __it = find(__x);
+    if (__it == end()) {
+      std::__throw_out_of_range("flat_map::at(const key_type&) const: Key does not exist");
+    }
+    return __it->second;
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI mapped_type& at(const _Kp& __x) {
+    auto __it = find(__x);
+    if (__it == end()) {
+      std::__throw_out_of_range("flat_map::at(const K&): Key does not exist");
+    }
+    return __it->second;
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const _Kp& __x) const {
+    auto __it = find(__x);
+    if (__it == end()) {
+      std::__throw_out_of_range("flat_map::at(const K&) const: Key does not exist");
+    }
+    return __it->second;
+  }
+
+  // [flat.map.modifiers], modifiers
+  template <class... _Args>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Args...>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> emplace(_Args&&... __args) {
+    std::pair<key_type, mapped_type> __pair(std::forward<_Args>(__args)...);
+    return __try_emplace(std::move(__pair.first), std::move(__pair.second));
+  }
+
+  template <class... _Args>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Args...>
+  _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) {
+    std::pair<key_type, mapped_type> __pair(std::forward<_Args>(__args)...);
+    return __try_emplace_hint(__hint, std::move(__pair.first), std::move(__pair.second)).first;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert(const value_type& __x) { return emplace(__x); }
+
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert(value_type&& __x) { return emplace(std::move(__x)); }
+
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) {
+    return emplace_hint(__hint, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) {
+    return emplace_hint(__hint, std::move(__x));
+  }
+
+  template <class _Pp>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Pp>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert(_Pp&& __x) {
+    return emplace(std::forward<_Pp>(__x));
+  }
+
+  template <class _Pp>
+    requires is_constructible_v<pair<key_type, mapped_type>, _Pp>
+  _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _Pp&& __x) {
+    return emplace_hint(__hint, std::forward<_Pp>(__x));
+  }
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) {
+    if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
+      __reserve(__last - __first);
+    }
+    __append_sort_merge_unique</*WasSorted = */ false>(std::move(__first), std::move(__last));
+  }
+
+  template <class _InputIterator>
+    requires __has_input_iterator_category<_InputIterator>::value
+  void insert(sorted_unique_t, _InputIterator __first, _InputIterator __last) {
+    if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) {
+      __reserve(__last - __first);
+    }
+
+    __append_sort_merge_unique</*WasSorted = */ true>(std::move(__first), std::move(__last));
+  }
+
+  template <_ContainerCompatibleRange<value_type> _Range>
+  _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) {
+    if constexpr (ranges::sized_range<_Range>) {
+      __reserve(ranges::size(__range));
+    }
+
+    __append_sort_merge_unique</*WasSorted = */ false>(ranges::begin(__range), ranges::end(__range));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void insert(initializer_list<value_type> __il) { insert(__il.begin(), __il.end()); }
+
+  _LIBCPP_HIDE_FROM_ABI void insert(sorted_unique_t, initializer_list<value_type> __il) {
+    insert(sorted_unique, __il.begin(), __il.end());
+  }
+
+  _LIBCPP_HIDE_FROM_ABI containers extract() && {
+    auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; });
+    auto __ret   = std::move(__containers_);
+    return __ret;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) {
+    _LIBCPP_ASSERT_VALID_INPUT_RANGE(
+        __key_cont.size() == __mapped_cont.size(), "flat_map keys and mapped containers have different size");
+
+    _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
+        __is_sorted_and_unique(__key_cont), "Either the key container is not sorted or it contains duplicates");
+    auto __guard         = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    __containers_.keys   = std::move(__key_cont);
+    __containers_.values = std::move(__mapped_cont);
+    __guard.__complete();
+  }
+
+  template <class... _Args>
+    requires is_constructible_v<mapped_type, _Args...>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(const key_type& __key, _Args&&... __args) {
+    return __try_emplace(__key, std::forward<_Args>(__args)...);
+  }
+
+  template <class... _Args>
+    requires is_constructible_v<mapped_type, _Args...>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(key_type&& __key, _Args&&... __args) {
+    return __try_emplace(std::move(__key), std::forward<_Args>(__args)...);
+  }
+
+  template <class _Kp, class... _Args>
+    requires(__is_compare_transparent && is_constructible_v<key_type, _Kp> &&
+             is_constructible_v<mapped_type, _Args...> && !is_convertible_v<_Kp &&, const_iterator> &&
+             !is_convertible_v<_Kp &&, iterator>)
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> try_emplace(_Kp&& __key, _Args&&... __args) {
+    return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...);
+  }
+
+  template <class... _Args>
+    requires is_constructible_v<mapped_type, _Args...>
+  _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, const key_type& __key, _Args&&... __args) {
+    return __try_emplace_hint(__hint, __key, std::forward<_Args>(__args)...).first;
+  }
+
+  template <class... _Args>
+    requires is_constructible_v<mapped_type, _Args...>
+  _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, key_type&& __key, _Args&&... __args) {
+    return __try_emplace_hint(__hint, std::move(__key), std::forward<_Args>(__args)...).first;
+  }
+
+  template <class _Kp, class... _Args>
+    requires __is_compare_transparent && is_constructible_v<key_type, _Kp> && is_constructible_v<mapped_type, _Args...>
+  _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, _Kp&& __key, _Args&&... __args) {
+    return __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Args>(__args)...).first;
+  }
+
+  template <class _Mapped>
+    requires is_assignable_v<mapped_type&, _Mapped> && is_constructible_v<mapped_type, _Mapped>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(const key_type& __key, _Mapped&& __obj) {
+    return __insert_or_assign(__key, std::forward<_Mapped>(__obj));
+  }
+
+  template <class _Mapped>
+    requires is_assignable_v<mapped_type&, _Mapped> && is_constructible_v<mapped_type, _Mapped>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(key_type&& __key, _Mapped&& __obj) {
+    return __insert_or_assign(std::move(__key), std::forward<_Mapped>(__obj));
+  }
+
+  template <class _Kp, class _Mapped>
+    requires __is_compare_transparent && is_constructible_v<key_type, _Kp> && is_assignable_v<mapped_type&, _Mapped> &&
+             is_constructible_v<mapped_type, _Mapped>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> insert_or_assign(_Kp&& __key, _Mapped&& __obj) {
+    return __insert_or_assign(std::forward<_Kp>(__key), std::forward<_Mapped>(__obj));
+  }
+
+  template <class _Mapped>
+    requires is_assignable_v<mapped_type&, _Mapped> && is_constructible_v<mapped_type, _Mapped>
+  _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, const key_type& __key, _Mapped&& __obj) {
+    return __insert_or_assign(__hint, __key, std::forward<_Mapped>(__obj));
+  }
+
+  template <class _Mapped>
+    requires is_assignable_v<mapped_type&, _Mapped> && is_constructible_v<mapped_type, _Mapped>
+  _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, key_type&& __key, _Mapped&& __obj) {
+    return __insert_or_assign(__hint, std::move(__key), std::forward<_Mapped>(__obj));
+  }
+
+  template <class _Kp, class _Mapped>
+    requires __is_compare_transparent && is_constructible_v<key_type, _Kp> && is_assignable_v<mapped_type&, _Mapped> &&
+             is_constructible_v<mapped_type, _Mapped>
+  _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __obj) {
+    return __insert_or_assign(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__obj));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) {
+    return __erase(__position.__key_iter_, __position.__mapped_iter_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) {
+    return __erase(__position.__key_iter_, __position.__mapped_iter_);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) {
+    auto __iter = find(__x);
+    if (__iter != end()) {
+      erase(__iter);
+      return 1;
+    }
+    return 0;
+  }
+
+  template <class _Kp>
+    requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> &&
+             !is_convertible_v<_Kp &&, const_iterator>)
+  _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) {
+    auto [__first, __last] = equal_range(__x);
+    auto __res             = __last - __first;
+    erase(__first, __last);
+    return __res;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) {
+    auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    auto __key_it     = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_);
+    auto __mapped_it  = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_);
+    __on_failure.__complete();
+    return iterator(std::move(__key_it), std::move(__mapped_it));
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __y) noexcept {
+    // warning: The spec has unconditional noexcept, which means that
+    // if any of the following functions throw an exception,
+    // std::terminate will be called.
+    // This is discussed in P2767, which hasn't been voted on yet.
+    ranges::swap(__compare_, __y.__compare_);
+    ranges::swap(__containers_.keys, __y.__containers_.keys);
+    ranges::swap(__containers_.values, __y.__containers_.values);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void clear() noexcept {
+    __containers_.keys.clear();
+    __containers_.values.clear();
+  }
+
+  // observers
+  _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; }
+  _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); }
+
+  _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; }
+  _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; }
+
+  // map operations
+  _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) {
+    return __find_impl(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const {
+    return __find_impl(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const {
+    return contains(__x) ? 1 : 0;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const {
+    return find(__x) != end();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound<iterator>(*this, __x); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const {
+    return __lower_bound<const_iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) {
+    return __lower_bound<iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const {
+    return __lower_bound<const_iterator>(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound<iterator>(*this, __x); }
+
+  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const {
+    return __upper_bound<const_iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) {
+    return __upper_bound<iterator>(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const {
+    return __upper_bound<const_iterator>(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const key_type& __x) {
+    return __equal_range_impl(*this, __x);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const key_type& __x) const {
+    return __equal_range_impl(*this, __x);
+  }
+
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, iterator> equal_range(const _Kp& __x) {
+    return __equal_range_impl(*this, __x);
+  }
+  template <class _Kp>
+    requires __is_compare_transparent
+  _LIBCPP_HIDE_FROM_ABI pair<const_iterator, const_iterator> equal_range(const _Kp& __x) const {
+    return __equal_range_impl(*this, __x);
+  }
+
+  friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_map& __x, const flat_map& __y) {
+    return ranges::equal(__x, __y);
+  }
+
+  friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_map& __x, const flat_map& __y) {
+    return std::lexicographical_compare_three_way(
+        __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way);
+  }
+
+  friend _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __x, flat_map& __y) noexcept { __x.swap(__y); }
+
+private:
+  struct __ctor_uses_allocator_tag {
+    explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default;
+  };
+  struct __ctor_uses_allocator_empty_tag {
+    explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default;
+  };
+
+  template <class _Allocator, class _KeyCont, class _MappedCont, class... _CompArg>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI
+  flat_map(__ctor_uses_allocator_tag,
+           const _Allocator& __alloc,
+           _KeyCont&& __key_cont,
+           _MappedCont&& __mapped_cont,
+           _CompArg&&... __comp)
+      : __containers_{.keys = std::make_obj_using_allocator<key_container_type>(
+                          __alloc, std::forward<_KeyCont>(__key_cont)),
+                      .values = std::make_obj_using_allocator<mapped_container_type>(
+                          __alloc, std::forward<_MappedCont>(__mapped_cont))},
+        __compare_(std::forward<_CompArg>(__comp)...) {}
+
+  template <class _Allocator, class... _CompArg>
+    requires __allocator_ctor_constraint<_Allocator>
+  _LIBCPP_HIDE_FROM_ABI flat_map(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp)
+      : __containers_{.keys   = std::make_obj_using_allocator<key_container_type>(__alloc),
+                      .values = std::make_obj_using_allocator<mapped_container_type>(__alloc)},
+        __compare_(std::forward<_CompArg>(__comp)...) {}
+
+  _LIBCPP_HIDE_FROM_ABI bool __is_sorted_and_unique(auto&& __key_container) const {
+    auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); };
+    return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container);
+  }
+
+  // This function is only used in constructors. So there is not exception handling in this function.
+  // If the function exits via an exception, there will be no flat_map object constructed, thus, there
+  // is no invariant state to preserve
+  _LIBCPP_HIDE_FROM_ABI void __sort_and_unique() {
+    auto __zv = ranges::views::zip(__containers_.keys, __containers_.values);
+    // To be consistent with std::map's behaviour, we use stable_sort instead of sort.
+    // As a result, if there are duplicated keys, the first value in the original order will be taken.
+    ranges::stable_sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); });
+    auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin();
+    auto __dist      = ranges::distance(__zv.begin(), __dup_start);
+    __containers_.keys.erase(__containers_.keys.begin() + __dist, __containers_.keys.end());
+    __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end());
+  }
+
+  template <class _InputIterator, class _Sentinel>
+  _LIBCPP_HIDE_FROM_ABI size_type __append(_InputIterator __first, _Sentinel __last) {
+    size_type __num_of_appended = 0;
+    for (; __first != __last; ++__first) {
+      value_type __kv = *__first;
+      __containers_.keys.insert(__containers_.keys.end(), std::move(__kv.first));
+      __containers_.values.insert(__containers_.values.end(), std::move(__kv.second));
+      ++__num_of_appended;
+    }
+    return __num_of_appended;
+  }
+
+  template <bool _WasSorted, class _InputIterator, class _Sentinel>
+  _LIBCPP_HIDE_FROM_ABI void __append_sort_merge_unique(_InputIterator __first, _Sentinel __last) {
+    auto __on_failure        = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    size_t __num_of_appended = __append(std::move(__first), std::move(__last));
+    if (__num_of_appended != 0) {
+      auto __zv                  = ranges::views::zip(__containers_.keys, __containers_.values);
+      auto __append_start_offset = __containers_.keys.size() - __num_of_appended;
+      auto __end                 = __zv.end();
+      auto __compare_key         = [this](const auto& __p1, const auto& __p2) {
+        return __compare_(std::get<0>(__p1), std::get<0>(__p2));
+      };
+      if constexpr (!_WasSorted) {
+        ranges::stable_sort(__zv.begin() + __append_start_offset, __end, __compare_key);
+      } else {
+        _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(
+            __is_sorted_and_unique(__containers_.keys | ranges::views::drop(__append_start_offset)),
+            "Either the key container is not sorted or it contains duplicates");
+      }
+      ranges::inplace_merge(__zv.begin(), __zv.begin() + __append_start_offset, __end, __compare_key);
+
+      auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin();
+      auto __dist      = ranges::distance(__zv.begin(), __dup_start);
+      __containers_.keys.erase(__containers_.keys.begin() + __dist, __containers_.keys.end());
+      __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end());
+    }
+    __on_failure.__complete();
+  }
+
+  template <class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) {
+    auto __it   = __self.lower_bound(__key);
+    auto __last = __self.end();
+    if (__it == __last || __self.__compare_(__key, __it->first)) {
+      return __last;
+    }
+    return __it;
+  }
+
+  template <class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static auto __key_equal_range(_Self&& __self, const _Kp& __key) {
+    auto __it   = ranges::lower_bound(__self.__containers_.keys, __key, __self.__compare_);
+    auto __last = __self.__containers_.keys.end();
+    if (__it == __last || __self.__compare_(__key, *__it)) {
+      return std::make_pair(__it, __it);
+    }
+    return std::make_pair(__it, std::next(__it));
+  }
+
+  template <class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) {
+    auto [__key_first, __key_last] = __key_equal_range(__self, __key);
+
+    const auto __make_mapped_iter = [&](const auto& __key_iter) {
+      return __self.__containers_.values.begin() +
+             static_cast<ranges::range_difference_t<mapped_container_type>>(
+                 ranges::distance(__self.__containers_.keys.begin(), __key_iter));
+    };
+
+    using __iterator_type = ranges::iterator_t<decltype(__self)>;
+    return std::make_pair(__iterator_type(__key_first, __make_mapped_iter(__key_first)),
+                          __iterator_type(__key_last, __make_mapped_iter(__key_last)));
+  }
+
+  template <class _Res, class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) {
+    return __binary_search<_Res>(__self, ranges::lower_bound, __x);
+  }
+
+  template <class _Res, class _Self, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) {
+    return __binary_search<_Res>(__self, ranges::upper_bound, __x);
+  }
+
+  template <class _Res, class _Self, class _Fn, class _Kp>
+  _LIBCPP_HIDE_FROM_ABI static _Res __binary_search(_Self&& __self, _Fn __search_fn, _Kp& __x) {
+    auto __key_iter = __search_fn(__self.__containers_.keys, __x, __self.__compare_);
+    auto __mapped_iter =
+        __self.__containers_.values.begin() +
+        static_cast<ranges::range_difference_t<mapped_container_type>>(
+            ranges::distance(__self.__containers_.keys.begin(), __key_iter));
+
+    return _Res(std::move(__key_iter), std::move(__mapped_iter));
+  }
+
+  template <class _KeyArg, class... _MArgs>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __try_emplace(_KeyArg&& __key, _MArgs&&... __mapped_args) {
+    auto __key_it    = ranges::lower_bound(__containers_.keys, __key, __compare_);
+    auto __mapped_it = __containers_.values.begin() + ranges::distance(__containers_.keys.begin(), __key_it);
+
+    if (__key_it == __containers_.keys.end() || __compare_(__key, *__key_it)) {
+      return pair<iterator, bool>(
+          __try_emplace_exact_hint(
+              std::move(__key_it),
+              std::move(__mapped_it),
+              std::forward<_KeyArg>(__key),
+              std::forward<_MArgs>(__mapped_args)...),
+          true);
+    } else {
+      return pair<iterator, bool>(iterator(std::move(__key_it), std::move(__mapped_it)), false);
+    }
+  }
+
+  template <class _Kp>
+  _LIBCPP_HIDE_FROM_ABI bool __is_hint_correct(const_iterator __hint, _Kp&& __key) {
+    if (__hint != cbegin() && !__compare_((__hint - 1)->first, __key)) {
+      return false;
+    }
+    if (__hint != cend() && __compare_(__hint->first, __key)) {
+      return false;
+    }
+    return true;
+  }
+
+  template <class _Kp, class... _Args>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __try_emplace_hint(const_iterator __hint, _Kp&& __key, _Args&&... __args) {
+    if (__is_hint_correct(__hint, __key)) {
+      if (__hint == cend() || __compare_(__key, __hint->first)) {
+        return {
+            __try_emplace_exact_hint(
+                __hint.__key_iter_, __hint.__mapped_iter_, std::forward<_Kp>(__key), std::forward<_Args>(__args)...),
+            true};
+      } else {
+        // key equals
+        auto __dist = __hint - cbegin();
+        return {iterator(__containers_.keys.begin() + __dist, __containers_.values.begin() + __dist), false};
+      }
+    } else {
+      return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...);
+    }
+  }
+
+  template <class _IterK, class _IterM, class _KeyArg, class... _MArgs>
+  _LIBCPP_HIDE_FROM_ABI iterator
+  __try_emplace_exact_hint(_IterK&& __it_key, _IterM&& __it_mapped, _KeyArg&& __key, _MArgs&&... __mapped_args) {
+    auto __on_key_failed = std::__make_exception_guard([&]() noexcept {
+      if constexpr (__container_traits<_KeyContainer>::__emplacement_has_strong_exception_safety_guarantee) {
+        // Nothing to roll back!
+      } else {
+        // we need to clear both because we don't know the state of our keys anymore
+        clear() /* noexcept */;
+      }
+    });
+    auto __key_it        = __containers_.keys.emplace(__it_key, std::forward<_KeyArg>(__key));
+    __on_key_failed.__complete();
+
+    auto __on_value_failed = std::__make_exception_guard([&]() noexcept {
+      if constexpr (!__container_traits<_MappedContainer>::__emplacement_has_strong_exception_safety_guarantee) {
+        // we need to clear both because we don't know the state of our values anymore
+        clear() /* noexcept */;
+      } else {
+        // In this case, we know the values are just like before we attempted emplacement,
+        // and we also know that the keys have been emplaced successfully. Just roll back the keys.
+#  if _LIBCPP_HAS_EXCEPTIONS
+        try {
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+          __containers_.keys.erase(__key_it);
+#  if _LIBCPP_HAS_EXCEPTIONS
+        } catch (...) {
+          // Now things are funky for real. We're failing to rollback the keys.
+          // Just give up and clear the whole thing.
+          //
+          // Also, swallow the exception that happened during the rollback and let the
+          // original value-emplacement exception propagate normally.
+          clear() /* noexcept */;
+        }
+#  endif // _LIBCPP_HAS_EXCEPTIONS
+      }
+    });
+    auto __mapped_it = __containers_.values.emplace(__it_mapped, std::forward<_MArgs>(__mapped_args)...);
+    __on_value_failed.__complete();
+
+    return iterator(std::move(__key_it), std::move(__mapped_it));
+  }
+
+  template <class _Kp, class _Mapped>
+  _LIBCPP_HIDE_FROM_ABI pair<iterator, bool> __insert_or_assign(_Kp&& __key, _Mapped&& __mapped) {
+    auto __r = try_emplace(std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped));
+    if (!__r.second) {
+      __r.first->second = std::forward<_Mapped>(__mapped);
+    }
+    return __r;
+  }
+
+  template <class _Kp, class _Mapped>
+  _LIBCPP_HIDE_FROM_ABI iterator __insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __mapped) {
+    auto __r = __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped));
+    if (!__r.second) {
+      __r.first->second = std::forward<_Mapped>(__mapped);
+    }
+    return __r.first;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) {
+    if constexpr (requires { __containers_.keys.reserve(__size); }) {
+      __containers_.keys.reserve(__size);
+    }
+
+    if constexpr (requires { __containers_.values.reserve(__size); }) {
+      __containers_.values.reserve(__size);
+    }
+  }
+
+  template <class _KIter, class _MIter>
+  _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) {
+    auto __on_failure  = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; });
+    auto __key_iter    = __containers_.keys.erase(__key_iter_to_remove);
+    auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove);
+    __on_failure.__complete();
+    return iterator(std::move(__key_iter), std::move(__mapped_iter));
+  }
+
+  template <class _Key2, class _Tp2, class _Compare2, class _KeyContainer2, class _MappedContainer2, class _Predicate>
+  friend typename flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type
+  erase_if(flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate);
+
+  containers __containers_;
+  [[no_unique_address]] key_compare __compare_;
+
+  struct __key_equiv {
+    _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {}
+    _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const {
+      return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x));
+    }
+    key_compare __comp_;
+  };
+};
+
+template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_map(_KeyContainer, _MappedContainer, _Compare = _Compare())
+    -> flat_map<typename _KeyContainer::value_type,
+                typename _MappedContainer::value_type,
+                _Compare,
+                _KeyContainer,
+                _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Allocator>
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
+           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+flat_map(_KeyContainer, _MappedContainer, _Allocator)
+    -> flat_map<typename _KeyContainer::value_type,
+                typename _MappedContainer::value_type,
+                less<typename _KeyContainer::value_type>,
+                _KeyContainer,
+                _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
+           uses_allocator_v<_MappedContainer, _Allocator> &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_map(_KeyContainer, _MappedContainer, _Compare, _Allocator)
+    -> flat_map<typename _KeyContainer::value_type,
+                typename _MappedContainer::value_type,
+                _Compare,
+                _KeyContainer,
+                _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Compare = less<typename _KeyContainer::value_type>>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare = _Compare())
+    -> flat_map<typename _KeyContainer::value_type,
+                typename _MappedContainer::value_type,
+                _Compare,
+                _KeyContainer,
+                _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Allocator>
+  requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> &&
+           !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value)
+flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Allocator)
+    -> flat_map<typename _KeyContainer::value_type,
+                typename _MappedContainer::value_type,
+                less<typename _KeyContainer::value_type>,
+                _KeyContainer,
+                _MappedContainer>;
+
+template <class _KeyContainer, class _MappedContainer, class _Compare, class _Allocator>
+  requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value &&
+           !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> &&
+           uses_allocator_v<_MappedContainer, _Allocator> &&
+           is_invocable_v<const _Compare&,
+                          const typename _KeyContainer::value_type&,
+                          const typename _KeyContainer::value_type&>)
+flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare, _Allocator)
+    -> flat_map<typename _KeyContainer::value_type,
+                typename _MappedContainer::value_type,
+                _Compare,
+                _KeyContainer,
+                _MappedContainer>;
+
+template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+flat_map(_InputIterator, _InputIterator, _Compare = _Compare())
+    -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
+
+template <class _InputIterator, class _Compare = less<__iter_key_type<_InputIterator>>>
+  requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value)
+flat_map(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare())
+    -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>;
+
+template <ranges::input_range _Range,
+          class _Compare   = less<__range_key_type<_Range>>,
+          class _Allocator = allocator<byte>,
+          class            = __enable_if_t<!__is_allocator<_Compare>::value && __is_allocator<_Allocator>::value>>
+flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator())
+    -> flat_map<
+        __range_key_type<_Range>,
+        __range_mapped_type<_Range>,
+        _Compare,
+        vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
+        vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
+
+template <ranges::input_range _Range, class _Allocator, class = __enable_if_t<__is_allocator<_Allocator>::value>>
+flat_map(from_range_t, _Range&&, _Allocator)
+    -> flat_map<
+        __range_key_type<_Range>,
+        __range_mapped_type<_Range>,
+        less<__range_key_type<_Range>>,
+        vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>,
+        vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>;
+
+template <class _Key, class _Tp, class _Compare = less<_Key>>
+  requires(!__is_allocator<_Compare>::value)
+flat_map(initializer_list<pair<_Key, _Tp>>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>;
+
+template <class _Key, class _Tp, class _Compare = less<_Key>>
+  requires(!__is_allocator<_Compare>::value)
+flat_map(sorted_unique_t, initializer_list<pair<_Key, _Tp>>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>;
+
+template <class _Key, class _Tp, class _Compare, class _KeyContainer, class _MappedContainer, class _Allocator>
+struct uses_allocator<flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>, _Allocator>
+    : bool_constant<uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator>> {};
+
+template <class _Key, class _Tp, class _Compare, class _KeyContainer, class _MappedContainer, class _Predicate>
+_LIBCPP_HIDE_FROM_ABI typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type
+erase_if(flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_map, _Predicate __pred) {
+  auto __zv     = ranges::views::zip(__flat_map.__containers_.keys, __flat_map.__containers_.values);
+  auto __first  = __zv.begin();
+  auto __last   = __zv.end();
+  auto __guard  = std::__make_exception_guard([&] { __flat_map.clear(); });
+  auto __it     = std::remove_if(__first, __last, [&](auto&& __zipped) -> bool {
+    using _Ref = typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::const_reference;
+    return __pred(_Ref(std::get<0>(__zipped), std::get<1>(__zipped)));
+  });
+  auto __res    = __last - __it;
+  auto __offset = __it - __first;
+
+  const auto __erase_container = [&](auto& __cont) { __cont.erase(__cont.begin() + __offset, __cont.end()); };
+
+  __erase_container(__flat_map.__containers_.keys);
+  __erase_container(__flat_map.__containers_.values);
+
+  __guard.__complete();
+  return __res;
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 23
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FLAT_MAP_FLAT_MAP_H
diff --git a/libcxx/include/__flat_map/sorted_unique.h b/libcxx/include/__flat_map/sorted_unique.h
new file mode 100644
index 000000000000000..0189a5ff1d56843
--- /dev/null
+++ b/libcxx/include/__flat_map/sorted_unique.h
@@ -0,0 +1,31 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H
+#define _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#if _LIBCPP_STD_VER >= 23
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+struct sorted_unique_t {
+  explicit sorted_unique_t() = default;
+};
+inline constexpr sorted_unique_t sorted_unique{};
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 23
+
+#endif // _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H
diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h
index 6022bd679ed3e3c..67d9da289aead3f 100644
--- a/libcxx/include/__functional/operations.h
+++ b/libcxx/include/__functional/operations.h
@@ -362,6 +362,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> {
 };
 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less);
 
+template <class _Tp>
+inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true;
+
 template <class _Tp>
 inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value;
 
@@ -377,6 +380,9 @@ struct _LIBCPP_TEMPLATE_VIS less<void> {
   typedef void is_transparent;
 };
 
+template <class _Tp, class _Up>
+inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true;
+
 template <class _Tp>
 inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value;
 #endif
@@ -446,6 +452,9 @@ struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> {
 };
 _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater);
 
+template <class _Tp>
+inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true;
+
 #if _LIBCPP_STD_VER >= 14
 template <>
 struct _LIBCPP_TEMPLATE_VIS greater<void> {
@@ -457,6 +466,9 @@ struct _LIBCPP_TEMPLATE_VIS greater<void> {
   }
   typedef void is_transparent;
 };
+
+template <class _Tp, class _Up>
+inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true;
 #endif
 
 // Logical operations
diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h
index f023d765a6c8ab2..df95843e7c9af61 100644
--- a/libcxx/include/__functional/ranges_operations.h
+++ b/libcxx/include/__functional/ranges_operations.h
@@ -102,6 +102,12 @@ inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = tru
 template <class _Tp, class _Up>
 inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true;
 
+template <class _Tp, class _Up>
+inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true;
+
+template <class _Tp, class _Up>
+inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true;
+
 #endif // _LIBCPP_STD_VER >= 20
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h
index 8c000c558c52793..b6c80255b4d1991 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -15,18 +15,16 @@
 #  include <__locale_dir/locale_base_api/ibm.h>
 #elif defined(__ANDROID__)
 #  include <__locale_dir/locale_base_api/android.h>
-#elif defined(__sun__)
-#  include <__locale_dir/locale_base_api/solaris.h>
-#elif defined(_NEWLIB_VERSION)
-#  include <__locale_dir/locale_base_api/newlib.h>
 #elif defined(__OpenBSD__)
 #  include <__locale_dir/locale_base_api/openbsd.h>
 #elif defined(__Fuchsia__)
 #  include <__locale_dir/locale_base_api/fuchsia.h>
 #elif defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC)
 #  include <__locale_dir/locale_base_api/musl.h>
-#elif defined(__APPLE__) || defined(__FreeBSD__)
-#  include <xlocale.h>
+#elif defined(__APPLE__)
+#  include <__locale_dir/locale_base_api/apple.h>
+#elif defined(__FreeBSD__)
+#  include <__locale_dir/locale_base_api/freebsd.h>
 #endif
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/__locale_dir/locale_base_api/android.h b/libcxx/include/__locale_dir/locale_base_api/android.h
index 9965d8bbf6a2ecc..08ef5407dedf4e0 100644
--- a/libcxx/include/__locale_dir/locale_base_api/android.h
+++ b/libcxx/include/__locale_dir/locale_base_api/android.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H
 
 #include <stdlib.h>
 
@@ -47,4 +47,4 @@ inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr
 #  endif // __NDK_MAJOR__ <= 16
 #endif   // __has_include(<android/ndk-version.h>)
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/newlib.h b/libcxx/include/__locale_dir/locale_base_api/apple.h
similarity index 62%
rename from libcxx/include/__locale_dir/locale_base_api/newlib.h
rename to libcxx/include/__locale_dir/locale_base_api/apple.h
index a8c1cff16e6d800..ec5986c3a19f105 100644
--- a/libcxx/include/__locale_dir/locale_base_api/newlib.h
+++ b/libcxx/include/__locale_dir/locale_base_api/apple.h
@@ -1,4 +1,5 @@
-//===----------------------------------------------------------------------===//
+// -*- C++ -*-
+//===-----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H
+
+#include <xlocale.h>
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_APPLE_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h
index 1f9607209842cad..e88eb4fa41d7af9 100644
--- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h
+++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h
@@ -11,8 +11,8 @@
 // we will define the mapping from an internal macro to the real BSD symbol.
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -33,4 +33,4 @@
 #define __libcpp_asprintf_l(...) asprintf_l(__VA_ARGS__)
 #define __libcpp_sscanf_l(...) sscanf_l(__VA_ARGS__)
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
index 76b94287cd6cc88..ae2db6ae70bebcb 100644
--- a/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
+++ b/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h
@@ -10,10 +10,10 @@
 // of those functions for non-BSD platforms.
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
 
-#include <__locale_dir/locale_base_api/locale_guard.h>
+#include <__locale_dir/locale_guard.h>
 #include <cstdio>
 #include <stdarg.h>
 #include <stdlib.h>
@@ -29,64 +29,64 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return MB_CUR_MAX;
 }
 
 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 inline _LIBCPP_HIDE_FROM_ABI wint_t __libcpp_btowc_l(int __c, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return btowc(__c);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI int __libcpp_wctob_l(wint_t __c, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return wctob(__c);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t
 __libcpp_wcsnrtombs_l(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return wcsnrtombs(__dest, __src, __nwc, __len, __ps);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcrtomb_l(char* __s, wchar_t __wc, mbstate_t* __ps, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return wcrtomb(__s, __wc, __ps);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t
 __libcpp_mbsnrtowcs_l(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return mbsnrtowcs(__dest, __src, __nms, __len, __ps);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t
 __libcpp_mbrtowc_l(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return mbrtowc(__pwc, __s, __n, __ps);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mbtowc_l(wchar_t* __pwc, const char* __pmb, size_t __max, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return mbtowc(__pwc, __pmb, __max);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrlen_l(const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return mbrlen(__s, __n, __ps);
 }
 #endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
 
 inline _LIBCPP_HIDE_FROM_ABI lconv* __libcpp_localeconv_l(locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return localeconv();
 }
 
 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
 inline _LIBCPP_HIDE_FROM_ABI size_t
 __libcpp_mbsrtowcs_l(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, locale_t __l) {
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   return mbsrtowcs(__dest, __src, __len, __ps);
 }
 #endif
@@ -95,7 +95,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __libcpp_snprintf_l(
     char* __s, size_t __n, locale_t __l, const char* __format, ...) {
   va_list __va;
   va_start(__va, __format);
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   int __res = vsnprintf(__s, __n, __format, __va);
   va_end(__va);
   return __res;
@@ -105,7 +105,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l(
     char** __s, locale_t __l, const char* __format, ...) {
   va_list __va;
   va_start(__va, __format);
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   int __res = vasprintf(__s, __format, __va);
   va_end(__va);
   return __res;
@@ -115,7 +115,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l(
     const char* __s, locale_t __l, const char* __format, ...) {
   va_list __va;
   va_start(__va, __format);
-  __libcpp_locale_guard __current(__l);
+  __locale_guard __current(__l);
   int __res = vsscanf(__s, __format, __va);
   va_end(__va);
   return __res;
@@ -123,4 +123,4 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l(
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/freebsd.h b/libcxx/include/__locale_dir/locale_base_api/freebsd.h
new file mode 100644
index 000000000000000..45ecf1977471b85
--- /dev/null
+++ b/libcxx/include/__locale_dir/locale_base_api/freebsd.h
@@ -0,0 +1,15 @@
+// -*- C++ -*-
+//===-----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FREEBSD_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FREEBSD_H
+
+#include <xlocale.h>
+
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FREEBSD_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h
index 4c3440f981c6d08..f6ef454ba7ada75 100644
--- a/libcxx/include/__locale_dir/locale_base_api/fuchsia.h
+++ b/libcxx/include/__locale_dir/locale_base_api/fuchsia.h
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H
 
 #include <__support/xlocale/__posix_l_fallback.h>
 #include <__support/xlocale/__strtonum_fallback.h>
 #include <cstdlib>
 #include <cwchar>
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_FUCHSIA_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/ibm.h b/libcxx/include/__locale_dir/locale_base_api/ibm.h
index fa3bc1c3633f5dc..1d1d15df9f7995e 100644
--- a/libcxx/include/__locale_dir/locale_base_api/ibm.h
+++ b/libcxx/include/__locale_dir/locale_base_api/ibm.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H
 
 #if defined(__MVS__)
 #  include <__support/ibm/locale_mgmt_zos.h>
@@ -105,4 +105,4 @@ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char
   return str_size;
 }
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/musl.h b/libcxx/include/__locale_dir/locale_base_api/musl.h
index bf7b849d5863421..1653214cdba1e39 100644
--- a/libcxx/include/__locale_dir/locale_base_api/musl.h
+++ b/libcxx/include/__locale_dir/locale_base_api/musl.h
@@ -14,8 +14,8 @@
 // in Musl.
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
 
 #include <cstdlib>
 #include <cwchar>
@@ -28,4 +28,4 @@ inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, c
   return ::strtoull(__nptr, __endptr, __base);
 }
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/openbsd.h b/libcxx/include/__locale_dir/locale_base_api/openbsd.h
index 0c05d6a0f788747..d4fb224e0c80a09 100644
--- a/libcxx/include/__locale_dir/locale_base_api/openbsd.h
+++ b/libcxx/include/__locale_dir/locale_base_api/openbsd.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H
 
 #include <__support/xlocale/__strtonum_fallback.h>
 #include <clocale>
@@ -16,4 +16,4 @@
 #include <ctype.h>
 #include <cwctype>
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/win32.h b/libcxx/include/__locale_dir/locale_base_api/win32.h
index f66baffb6920456..f488a0dc0d69b3f 100644
--- a/libcxx/include/__locale_dir/locale_base_api/win32.h
+++ b/libcxx/include/__locale_dir/locale_base_api/win32.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H
 
 #include <__config>
 #include <cstddef>
@@ -232,4 +232,4 @@ _LIBCPP_EXPORTED_FROM_ABI int vasprintf_l(char** __ret, locale_t __loc, const ch
 // not-so-pressing FIXME: use locale to determine blank characters
 inline int iswblank_l(wint_t __c, locale_t /*loc*/) { return (__c == L' ' || __c == L'\t'); }
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_WIN32_H
diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_guard.h
similarity index 73%
rename from libcxx/include/__locale_dir/locale_base_api/locale_guard.h
rename to libcxx/include/__locale_dir/locale_guard.h
index 2baacb51cd06555..e0c414c001c41f1 100644
--- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h
+++ b/libcxx/include/__locale_dir/locale_guard.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H
-#define _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H
+#ifndef _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H
+#define _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H
 
 #include <__config>
 #include <__locale> // for locale_t
@@ -19,23 +19,9 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-#if !defined(_LIBCPP_LOCALE__L_EXTENSIONS)
-struct __libcpp_locale_guard {
-  _LIBCPP_HIDE_FROM_ABI __libcpp_locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {}
-
-  _LIBCPP_HIDE_FROM_ABI ~__libcpp_locale_guard() {
-    if (__old_loc_)
-      uselocale(__old_loc_);
-  }
-
-  locale_t __old_loc_;
-
-  __libcpp_locale_guard(__libcpp_locale_guard const&)            = delete;
-  __libcpp_locale_guard& operator=(__libcpp_locale_guard const&) = delete;
-};
-#elif defined(_LIBCPP_MSVCRT_LIKE)
-struct __libcpp_locale_guard {
-  __libcpp_locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) {
+#if defined(_LIBCPP_MSVCRT_LIKE)
+struct __locale_guard {
+  __locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) {
     // Setting the locale can be expensive even when the locale given is
     // already the current locale, so do an explicit check to see if the
     // current locale is already the one we want.
@@ -51,7 +37,7 @@ struct __libcpp_locale_guard {
       __setlocale(__l.__get_locale());
     }
   }
-  ~__libcpp_locale_guard() {
+  ~__locale_guard() {
     // The CRT documentation doesn't explicitly say, but setlocale() does the
     // right thing when given a semicolon-separated list of locale settings
     // for the different categories in the same format as returned by
@@ -71,8 +57,22 @@ struct __libcpp_locale_guard {
   int __status;
   char* __locale_all = nullptr;
 };
+#else
+struct __locale_guard {
+  _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {}
+
+  _LIBCPP_HIDE_FROM_ABI ~__locale_guard() {
+    if (__old_loc_)
+      uselocale(__old_loc_);
+  }
+
+  locale_t __old_loc_;
+
+  __locale_guard(__locale_guard const&)            = delete;
+  __locale_guard& operator=(__locale_guard const&) = delete;
+};
 #endif
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H
+#endif // _LIBCPP___LOCALE_DIR_LOCALE_GUARD_H
diff --git a/libcxx/include/__memory/uninitialized_algorithms.h b/libcxx/include/__memory/uninitialized_algorithms.h
index 54af1fa1a1cc55d..3fa948ecc43cff1 100644
--- a/libcxx/include/__memory/uninitialized_algorithms.h
+++ b/libcxx/include/__memory/uninitialized_algorithms.h
@@ -638,7 +638,8 @@ __uninitialized_allocator_relocate(_Alloc& __alloc, _Tp* __first, _Tp* __last, _
     __guard.__complete();
     std::__allocator_destroy(__alloc, __first, __last);
   } else {
-    __builtin_memcpy(__result, __first, sizeof(_Tp) * (__last - __first));
+    // Casting to void* to suppress clang complaining that this is technically UB.
+    __builtin_memcpy(static_cast<void*>(__result), __first, sizeof(_Tp) * (__last - __first));
   }
 }
 
diff --git a/libcxx/include/__support/xlocale/__posix_l_fallback.h b/libcxx/include/__support/xlocale/__posix_l_fallback.h
index 8a3a6f27f48dde9..c83589181747094 100644
--- a/libcxx/include/__support/xlocale/__posix_l_fallback.h
+++ b/libcxx/include/__support/xlocale/__posix_l_fallback.h
@@ -25,24 +25,10 @@
 #  include <wctype.h>
 #endif
 
-inline _LIBCPP_HIDE_FROM_ABI int isalnum_l(int __c, locale_t) { return ::isalnum(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int isalpha_l(int __c, locale_t) { return ::isalpha(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int iscntrl_l(int __c, locale_t) { return ::iscntrl(__c); }
-
 inline _LIBCPP_HIDE_FROM_ABI int isdigit_l(int __c, locale_t) { return ::isdigit(__c); }
 
-inline _LIBCPP_HIDE_FROM_ABI int isgraph_l(int __c, locale_t) { return ::isgraph(__c); }
-
 inline _LIBCPP_HIDE_FROM_ABI int islower_l(int __c, locale_t) { return ::islower(__c); }
 
-inline _LIBCPP_HIDE_FROM_ABI int isprint_l(int __c, locale_t) { return ::isprint(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int ispunct_l(int __c, locale_t) { return ::ispunct(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int isspace_l(int __c, locale_t) { return ::isspace(__c); }
-
 inline _LIBCPP_HIDE_FROM_ABI int isupper_l(int __c, locale_t) { return ::isupper(__c); }
 
 inline _LIBCPP_HIDE_FROM_ABI int isxdigit_l(int __c, locale_t) { return ::isxdigit(__c); }
@@ -52,8 +38,6 @@ inline _LIBCPP_HIDE_FROM_ABI int toupper_l(int __c, locale_t) { return ::toupper
 inline _LIBCPP_HIDE_FROM_ABI int tolower_l(int __c, locale_t) { return ::tolower(__c); }
 
 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
-inline _LIBCPP_HIDE_FROM_ABI int iswalnum_l(wint_t __c, locale_t) { return ::iswalnum(__c); }
-
 inline _LIBCPP_HIDE_FROM_ABI int iswalpha_l(wint_t __c, locale_t) { return ::iswalpha(__c); }
 
 inline _LIBCPP_HIDE_FROM_ABI int iswblank_l(wint_t __c, locale_t) { return ::iswblank(__c); }
@@ -62,8 +46,6 @@ inline _LIBCPP_HIDE_FROM_ABI int iswcntrl_l(wint_t __c, locale_t) { return ::isw
 
 inline _LIBCPP_HIDE_FROM_ABI int iswdigit_l(wint_t __c, locale_t) { return ::iswdigit(__c); }
 
-inline _LIBCPP_HIDE_FROM_ABI int iswgraph_l(wint_t __c, locale_t) { return ::iswgraph(__c); }
-
 inline _LIBCPP_HIDE_FROM_ABI int iswlower_l(wint_t __c, locale_t) { return ::iswlower(__c); }
 
 inline _LIBCPP_HIDE_FROM_ABI int iswprint_l(wint_t __c, locale_t) { return ::iswprint(__c); }
diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h
index b0ce7c414e5d778..452c70bfbad66de 100644
--- a/libcxx/include/__type_traits/desugars_to.h
+++ b/libcxx/include/__type_traits/desugars_to.h
@@ -25,6 +25,12 @@ struct __equal_tag {};
 // syntactically, the operation is equivalent to calling `a + b`
 struct __plus_tag {};
 
+// syntactically, the operation is equivalent to calling `a < b`
+struct __less_tag {};
+
+// syntactically, the operation is equivalent to calling `a > b`
+struct __greater_tag {};
+
 // syntactically, the operation is equivalent to calling `a < b`, and these expressions
 // have to be true for any `a` and `b`:
 // - `(a < b) == (b > a)`
diff --git a/libcxx/include/__type_traits/is_trivially_copyable.h b/libcxx/include/__type_traits/is_trivially_copyable.h
index e92af126ee94d93..8eb3ba7581af159 100644
--- a/libcxx/include/__type_traits/is_trivially_copyable.h
+++ b/libcxx/include/__type_traits/is_trivially_copyable.h
@@ -27,10 +27,8 @@ template <class _Tp>
 inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp);
 #endif
 
-#if _LIBCPP_STD_VER >= 20
 template <class _Tp>
-inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t);
-#endif
+inline const bool __is_cheap_to_copy = __is_trivially_copyable(_Tp) && sizeof(_Tp) <= sizeof(std::intmax_t);
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__utility/exception_guard.h b/libcxx/include/__utility/exception_guard.h
index a03bd7e8f352277..00b835d3e2a2fc3 100644
--- a/libcxx/include/__utility/exception_guard.h
+++ b/libcxx/include/__utility/exception_guard.h
@@ -137,6 +137,12 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __exception_guard<_Rollback> __make_exce
   return __exception_guard<_Rollback>(std::move(__rollback));
 }
 
+template <class _Rollback>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __exception_guard_exceptions<_Rollback>
+__make_scope_guard(_Rollback __rollback) {
+  return __exception_guard_exceptions<_Rollback>(std::move(__rollback));
+}
+
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS
diff --git a/libcxx/include/__vector/vector.h b/libcxx/include/__vector/vector.h
index 7889e8c2201ac19..844e5d6a2105687 100644
--- a/libcxx/include/__vector/vector.h
+++ b/libcxx/include/__vector/vector.h
@@ -165,10 +165,12 @@ class _LIBCPP_TEMPLATE_VIS vector {
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI
   vector(size_type __n, const value_type& __x, const allocator_type& __a)
       : __alloc_(__a) {
+    auto __guard = std::__make_exception_guard(__destroy_vector(*this));
     if (__n > 0) {
       __vallocate(__n);
       __construct_at_end(__n, __x);
     }
+    __guard.__complete();
   }
 
   template <class _InputIterator,
diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex
index 94d2c8d7d003d4f..d379c9e7f0174a9 100644
--- a/libcxx/include/ccomplex
+++ b/libcxx/include/ccomplex
@@ -23,4 +23,16 @@
 #  pragma GCC system_header
 #endif
 
+#if _LIBCPP_STD_VER >= 20
+
+using __standard_header_ccomplex _LIBCPP_DEPRECATED_("removed in C++20. Include <complex> instead.") = void;
+using __use_standard_header_ccomplex = __standard_header_ccomplex;
+
+#elif _LIBCPP_STD_VER >= 17
+
+using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include <complex> instead.") = void;
+using __use_standard_header_ccomplex                                               = __standard_header_ccomplex;
+
+#endif
+
 #endif // _LIBCPP_CCOMPLEX
diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646
index 1d859f08fac572e..5fcac79e38a7f2f 100644
--- a/libcxx/include/ciso646
+++ b/libcxx/include/ciso646
@@ -21,4 +21,11 @@
 #  pragma GCC system_header
 #endif
 
+#if _LIBCPP_STD_VER >= 20
+
+using __standard_header_ciso646 _LIBCPP_DEPRECATED_("removed in C++20. Include <version> instead.") = void;
+using __use_standard_header_ciso646 = __standard_header_ciso646;
+
+#endif
+
 #endif // _LIBCPP_CISO646
diff --git a/libcxx/include/complex b/libcxx/include/complex
index 4030d96b003d568..15e42800fbfa0a6 100644
--- a/libcxx/include/complex
+++ b/libcxx/include/complex
@@ -1097,20 +1097,20 @@ inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> pow(const complex<_Tp>& __x, const com
   return std::exp(__y * std::log(__x));
 }
 
-template <class _Tp, class _Up>
+template <class _Tp, class _Up, __enable_if_t<is_floating_point<_Tp>::value && is_floating_point<_Up>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI complex<typename __promote<_Tp, _Up>::type>
 pow(const complex<_Tp>& __x, const complex<_Up>& __y) {
   typedef complex<typename __promote<_Tp, _Up>::type> result_type;
   return std::pow(result_type(__x), result_type(__y));
 }
 
-template <class _Tp, class _Up, __enable_if_t<is_arithmetic<_Up>::value, int> = 0>
+template <class _Tp, class _Up, __enable_if_t<is_floating_point<_Tp>::value && is_arithmetic<_Up>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI complex<typename __promote<_Tp, _Up>::type> pow(const complex<_Tp>& __x, const _Up& __y) {
   typedef complex<typename __promote<_Tp, _Up>::type> result_type;
   return std::pow(result_type(__x), result_type(__y));
 }
 
-template <class _Tp, class _Up, __enable_if_t<is_arithmetic<_Tp>::value, int> = 0>
+template <class _Tp, class _Up, __enable_if_t<is_arithmetic<_Tp>::value && is_floating_point<_Up>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI complex<typename __promote<_Tp, _Up>::type> pow(const _Tp& __x, const complex<_Up>& __y) {
   typedef complex<typename __promote<_Tp, _Up>::type> result_type;
   return std::pow(result_type(__x), result_type(__y));
diff --git a/libcxx/include/complex.h b/libcxx/include/complex.h
index a3da21c843f3650..89595ae2068a641 100644
--- a/libcxx/include/complex.h
+++ b/libcxx/include/complex.h
@@ -24,7 +24,7 @@
 #endif
 
 #ifdef __cplusplus
-#  include <ccomplex>
+#  include <complex>
 #elif __has_include_next(<complex.h>)
 #  include_next <complex.h>
 #endif
diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign
new file mode 100644
index 000000000000000..e6a2a3c71774220
--- /dev/null
+++ b/libcxx/include/cstdalign
@@ -0,0 +1,55 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_CSTDALIGN
+#define _LIBCPP_CSTDALIGN
+
+/*
+    cstdalign synopsis
+
+Macros:
+
+    __alignas_is_defined
+    __alignof_is_defined
+
+*/
+
+#include <__config>
+
+// <stdalign.h> is not provided by libc++
+#if __has_include(<stdalign.h>)
+#  include <stdalign.h>
+#  ifdef _LIBCPP_STDALIGN_H
+#    error "If libc++ starts defining <stdalign.h>, the __has_include check should move to libc++'s <stdalign.h>"
+#  endif
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#undef __alignas_is_defined
+#define __alignas_is_defined 1
+
+#undef __alignof_is_defined
+#define __alignof_is_defined 1
+
+#if _LIBCPP_STD_VER >= 20
+
+using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") = void;
+using __use_standard_header_cstdalign                                      = __standard_header_cstdalign;
+
+#elif _LIBCPP_STD_VER >= 17
+
+using __standard_header_cstdalign _LIBCPP_DEPRECATED = void;
+using __use_standard_header_cstdalign                = __standard_header_cstdalign;
+
+#endif
+
+#endif // _LIBCPP_CSTDALIGN
diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool
index ef731c021a4ab88..1d627258e10c094 100644
--- a/libcxx/include/cstdbool
+++ b/libcxx/include/cstdbool
@@ -28,4 +28,16 @@ Macros:
 #undef __bool_true_false_are_defined
 #define __bool_true_false_are_defined 1
 
+#if _LIBCPP_STD_VER >= 20
+
+using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") = void;
+using __use_standard_header_cstdbool                                      = __standard_header_cstdbool;
+
+#elif _LIBCPP_STD_VER >= 17
+
+using __standard_header_cstdbool _LIBCPP_DEPRECATED = void;
+using __use_standard_header_cstdbool                = __standard_header_cstdbool;
+
+#endif
+
 #endif // _LIBCPP_CSTDBOOL
diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath
index 6237979be4906cc..7dbe952f021b74f 100644
--- a/libcxx/include/ctgmath
+++ b/libcxx/include/ctgmath
@@ -18,11 +18,23 @@
 
 */
 
-#include <ccomplex>
 #include <cmath>
+#include <complex>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
 
+#if _LIBCPP_STD_VER >= 20
+
+using __standard_header_ctgmath _LIBCPP_DEPRECATED_("removed in C++20. Include <cmath> and <complex> instead.") = void;
+using __use_standard_header_ctgmath = __standard_header_ctgmath;
+
+#elif _LIBCPP_STD_VER >= 17
+
+using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include <cmath> and <complex> instead.") = void;
+using __use_standard_header_ctgmath = __standard_header_ctgmath;
+
+#endif
+
 #endif // _LIBCPP_CTGMATH
diff --git a/libcxx/include/flat_map b/libcxx/include/flat_map
new file mode 100644
index 000000000000000..15d79dd1ddca34b
--- /dev/null
+++ b/libcxx/include/flat_map
@@ -0,0 +1,54 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_FLAT_MAP
+#define _LIBCPP_FLAT_MAP
+
+/*
+  Header <flat_map> synopsis
+
+#include <compare>              // see [compare.syn]
+#include <initializer_list>     // see [initializer.list.syn]
+
+namespace std {
+  // [flat.map], class template flat_map
+  template<class Key, class T, class Compare = less<Key>,
+           class KeyContainer = vector<Key>, class MappedContainer = vector<T>>
+    class flat_map;
+
+  struct sorted_unique_t { explicit sorted_unique_t() = default; };
+  inline constexpr sorted_unique_t sorted_unique{};
+
+  template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
+           class Allocator>
+    struct uses_allocator<flat_map<Key, T, Compare, KeyContainer, MappedContainer>,
+                          Allocator>;
+
+  // [flat.map.erasure], erasure for flat_map
+  template<class Key, class T, class Compare, class KeyContainer, class MappedContainer,
+           class Predicate>
+    typename flat_map<Key, T, Compare, KeyContainer, MappedContainer>::size_type
+      erase_if(flat_map<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
+*/
+
+#include <__assert> // all public C++ headers provide the assertion handler
+#include <__config>
+#include <__flat_map/flat_map.h>
+#include <__flat_map/sorted_unique.h>
+#include <version>
+
+// standard required includes
+#include <compare>
+#include <initializer_list>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+#endif // _LIBCPP_FLAT_MAP
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index b429d7cff702b81..af8c3c15eb2767f 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -1109,6 +1109,11 @@ module std [system] {
     export *
   }
 
+  module cstdalign {
+    header "cstdalign"
+    export *
+  }
+
   module cstdarg {
     header "cstdarg"
     export *
@@ -1222,6 +1227,14 @@ module std [system] {
     export *
   }
 
+  module flat_map {
+    module flat_map                       { header "__flat_map/flat_map.h" }
+    module sorted_unique                  { header "__flat_map/sorted_unique.h" }
+
+    header "flat_map"
+    export *
+  }
+
   module format {
     module buffer                             { header "__format/buffer.h" }
     module concepts                           { header "__format/concepts.h" }
@@ -1443,15 +1456,16 @@ module std [system] {
   module locale {
     header "locale"
     header "__locale_dir/locale_base_api.h"
-    header "__locale_dir/locale_base_api/locale_guard.h"
+    header "__locale_dir/locale_guard.h"
     module locale_base_api {
       textual header "__locale_dir/locale_base_api/android.h"
+      textual header "__locale_dir/locale_base_api/apple.h"
       textual header "__locale_dir/locale_base_api/bsd_locale_defaults.h"
       textual header "__locale_dir/locale_base_api/bsd_locale_fallbacks.h"
+      textual header "__locale_dir/locale_base_api/freebsd.h"
       textual header "__locale_dir/locale_base_api/fuchsia.h"
       textual header "__locale_dir/locale_base_api/ibm.h"
       textual header "__locale_dir/locale_base_api/musl.h"
-      textual header "__locale_dir/locale_base_api/newlib.h"
       textual header "__locale_dir/locale_base_api/openbsd.h"
       textual header "__locale_dir/locale_base_api/win32.h"
     }
diff --git a/libcxx/include/tgmath.h b/libcxx/include/tgmath.h
index e6f0a4ab2611fa8..1c5058cb065a91c 100644
--- a/libcxx/include/tgmath.h
+++ b/libcxx/include/tgmath.h
@@ -24,7 +24,8 @@
 #endif
 
 #ifdef __cplusplus
-#  include <ctgmath>
+#  include <cmath>
+#  include <complex>
 #else
 #  if __has_include_next(<tgmath.h>)
 #    include_next <tgmath.h>
diff --git a/libcxx/include/version b/libcxx/include/version
index 5ab4f28a04d880e..cb75f3b2db681c7 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -174,6 +174,7 @@ __cpp_lib_nonmember_container_access                    201411L <array> <deque>
 __cpp_lib_not_fn                                        201603L <functional>
 __cpp_lib_null_iterators                                201304L <iterator>
 __cpp_lib_optional                                      202110L <optional>
+                                                        202106L // C++20
                                                         201606L // C++17
 __cpp_lib_optional_range_support                        202406L <optional>
 __cpp_lib_out_ptr                                       202311L <memory>
@@ -261,7 +262,9 @@ __cpp_lib_uncaught_exceptions                           201411L <exception>
 __cpp_lib_unordered_map_try_emplace                     201411L <unordered_map>
 __cpp_lib_unreachable                                   202202L <utility>
 __cpp_lib_unwrap_ref                                    201811L <functional>
-__cpp_lib_variant                                       202102L <variant>
+__cpp_lib_variant                                       202306L <variant>
+                                                        202106L // C++20
+                                                        202102L // C++17
 __cpp_lib_void_t                                        201411L <type_traits>
 
 */
@@ -427,6 +430,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_list_remove_return_type              201806L
 # define __cpp_lib_math_constants                       201907L
 # define __cpp_lib_move_iterator_concept                202207L
+# undef  __cpp_lib_optional
+# define __cpp_lib_optional                             202106L
 # if _LIBCPP_AVAILABILITY_HAS_PMR
 #   define __cpp_lib_polymorphic_allocator              201902L
 # endif
@@ -453,6 +458,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 # define __cpp_lib_to_array                             201907L
 # define __cpp_lib_type_identity                        201806L
 # define __cpp_lib_unwrap_ref                           201811L
+# undef  __cpp_lib_variant
+# define __cpp_lib_variant                              202106L
 #endif
 
 #if _LIBCPP_STD_VER >= 23
@@ -570,6 +577,8 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_to_string                            202306L
 # undef  __cpp_lib_tuple_like
 // # define __cpp_lib_tuple_like                           202311L
+# undef  __cpp_lib_variant
+# define __cpp_lib_variant                              202306L
 #endif
 
 // clang-format on
diff --git a/libcxx/modules/std.compat.cppm.in b/libcxx/modules/std.compat.cppm.in
index 797b413f68e2724..fbc2c7d94cfabb3 100644
--- a/libcxx/modules/std.compat.cppm.in
+++ b/libcxx/modules/std.compat.cppm.in
@@ -53,9 +53,6 @@ module;
 #  if __has_include(<debugging>)
 #    error "please update the header information for <debugging> in headers_not_available in utils/libcxx/header_information.py"
 #  endif // __has_include(<debugging>)
-#  if __has_include(<flat_map>)
-#    error "please update the header information for <flat_map> in headers_not_available in utils/libcxx/header_information.py"
-#  endif // __has_include(<flat_map>)
 #  if __has_include(<flat_set>)
 #    error "please update the header information for <flat_set> in headers_not_available in utils/libcxx/header_information.py"
 #  endif // __has_include(<flat_set>)
diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in
index 64ed8d4088cc018..b4889e5a69e49bd 100644
--- a/libcxx/modules/std.cppm.in
+++ b/libcxx/modules/std.cppm.in
@@ -64,6 +64,7 @@ module;
 #include <execution>
 #include <expected>
 #include <filesystem>
+#include <flat_map>
 #include <format>
 #include <forward_list>
 #if !defined(_LIBCPP_HAS_NO_LOCALIZATION)
@@ -161,9 +162,6 @@ module;
 #  if __has_include(<debugging>)
 #    error "please update the header information for <debugging> in headers_not_available in utils/libcxx/header_information.py"
 #  endif // __has_include(<debugging>)
-#  if __has_include(<flat_map>)
-#    error "please update the header information for <flat_map> in headers_not_available in utils/libcxx/header_information.py"
-#  endif // __has_include(<flat_map>)
 #  if __has_include(<flat_set>)
 #    error "please update the header information for <flat_set> in headers_not_available in utils/libcxx/header_information.py"
 #  endif // __has_include(<flat_set>)
diff --git a/libcxx/modules/std/flat_map.inc b/libcxx/modules/std/flat_map.inc
index 83cd20ad6189460..6a86229bceaba9d 100644
--- a/libcxx/modules/std/flat_map.inc
+++ b/libcxx/modules/std/flat_map.inc
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 export namespace std {
-#if 0
-  // [flat.map], class template flat_­map
+#if _LIBCPP_STD_VER >= 23
+  // [flat.map], class template flat_map
   using std::flat_map;
 
   using std::sorted_unique;
@@ -17,15 +17,17 @@ export namespace std {
 
   using std::uses_allocator;
 
-  // [flat.map.erasure], erasure for flat_­map
+  // [flat.map.erasure], erasure for flat_map
   using std::erase_if;
 
-  // [flat.multimap], class template flat_­multimap
+#endif // _LIBCPP_STD_VER >= 23
+#if 0
+  // [flat.multimap], class template flat_multimap
   using std::flat_multimap;
 
   using std::sorted_equivalent;
   using std::sorted_equivalent_t;
 
-  // [flat.multimap.erasure], erasure for flat_­multimap
+  // [flat.multimap.erasure], erasure for flat_multimap
 #endif
 } // namespace std
diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp
index af9d60a8e271e84..a7c39b5e5183a4d 100644
--- a/libcxx/src/algorithm.cpp
+++ b/libcxx/src/algorithm.cpp
@@ -21,8 +21,7 @@ void __sort(RandomAccessIterator first, RandomAccessIterator last, Comp comp) {
   std::__introsort<_ClassicAlgPolicy,
                    ranges::less,
                    RandomAccessIterator,
-                   __use_branchless_sort<ranges::less, RandomAccessIterator>::value>(
-      first, last, ranges::less{}, depth_limit);
+                   __use_branchless_sort<ranges::less, RandomAccessIterator>>(first, last, ranges::less{}, depth_limit);
 }
 
 // clang-format off
diff --git a/libcxx/src/iostream.cpp b/libcxx/src/iostream.cpp
index c5ad77a01916084..48d2fdb866a332c 100644
--- a/libcxx/src/iostream.cpp
+++ b/libcxx/src/iostream.cpp
@@ -12,7 +12,7 @@
 #include <string>
 
 #ifdef _LIBCPP_MSVCRT_LIKE
-#  include <__locale_dir/locale_base_api/locale_guard.h>
+#  include <__locale_dir/locale_guard.h>
 #endif
 
 #define _str(s) #s
@@ -109,7 +109,7 @@ static void force_locale_initialization() {
   static bool once = []() {
     auto loc = newlocale(LC_ALL_MASK, "C", 0);
     {
-      __libcpp_locale_guard g(loc); // forces initialization of locale TLS
+      __locale_guard g(loc); // forces initialization of locale TLS
       ((void)g);
     }
     freelocale(loc);
diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp
index 57ef94932ba0a76..2a08e97b8645b40 100644
--- a/libcxx/src/support/win32/locale_win32.cpp
+++ b/libcxx/src/support/win32/locale_win32.cpp
@@ -11,12 +11,10 @@
 #include <memory>
 #include <type_traits>
 
-#include <__locale_dir/locale_base_api/locale_guard.h>
+#include <__locale_dir/locale_guard.h>
 
 int __libcpp_vasprintf(char** sptr, const char* __restrict fmt, va_list ap);
 
-using std::__libcpp_locale_guard;
-
 // FIXME: base and mask currently unused. Needs manual work to construct the new locale
 locale_t newlocale(int /*mask*/, const char* locale, locale_t /*base*/) {
   return {_create_locale(LC_ALL, locale), locale};
@@ -26,33 +24,33 @@ decltype(MB_CUR_MAX) MB_CUR_MAX_L(locale_t __l) {
 #if defined(_LIBCPP_MSVCRT)
   return ___mb_cur_max_l_func(__l);
 #else
-  __libcpp_locale_guard __current(__l);
+  std::__locale_guard __current(__l);
   return MB_CUR_MAX;
 #endif
 }
 
 lconv* localeconv_l(locale_t& loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   lconv* lc = localeconv();
   if (!lc)
     return lc;
   return loc.__store_lconv(lc);
 }
 size_t mbrlen_l(const char* __restrict s, size_t n, mbstate_t* __restrict ps, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return mbrlen(s, n, ps);
 }
 size_t
 mbsrtowcs_l(wchar_t* __restrict dst, const char** __restrict src, size_t len, mbstate_t* __restrict ps, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return mbsrtowcs(dst, src, len, ps);
 }
 size_t wcrtomb_l(char* __restrict s, wchar_t wc, mbstate_t* __restrict ps, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return wcrtomb(s, wc, ps);
 }
 size_t mbrtowc_l(wchar_t* __restrict pwc, const char* __restrict s, size_t n, mbstate_t* __restrict ps, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return mbrtowc(pwc, s, n, ps);
 }
 size_t mbsnrtowcs_l(wchar_t* __restrict dst,
@@ -61,7 +59,7 @@ size_t mbsnrtowcs_l(wchar_t* __restrict dst,
                     size_t len,
                     mbstate_t* __restrict ps,
                     locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return mbsnrtowcs(dst, src, nms, len, ps);
 }
 size_t wcsnrtombs_l(char* __restrict dst,
@@ -70,15 +68,15 @@ size_t wcsnrtombs_l(char* __restrict dst,
                     size_t len,
                     mbstate_t* __restrict ps,
                     locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return wcsnrtombs(dst, src, nwc, len, ps);
 }
 wint_t btowc_l(int c, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return btowc(c);
 }
 int wctob_l(wint_t c, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return wctob(c);
 }
 
@@ -90,7 +88,7 @@ int snprintf_l(char* ret, size_t n, locale_t loc, const char* format, ...) {
   int result = __stdio_common_vsprintf(
       _CRT_INTERNAL_LOCAL_PRINTF_OPTIONS | _CRT_INTERNAL_PRINTF_STANDARD_SNPRINTF_BEHAVIOR, ret, n, format, loc, ap);
 #else
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   _LIBCPP_DIAGNOSTIC_PUSH
   _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wformat-nonliteral")
   int result = vsnprintf(ret, n, format, ap);
@@ -108,25 +106,25 @@ int asprintf_l(char** ret, locale_t loc, const char* format, ...) {
   return result;
 }
 int vasprintf_l(char** ret, locale_t loc, const char* format, va_list ap) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return __libcpp_vasprintf(ret, format, ap);
 }
 
 #if !defined(_LIBCPP_MSVCRT)
 float strtof_l(const char* nptr, char** endptr, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return strtof(nptr, endptr);
 }
 
 long double strtold_l(const char* nptr, char** endptr, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return strtold(nptr, endptr);
 }
 #endif
 
 #if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800
 size_t strftime_l(char* ret, size_t n, const char* format, const struct tm* tm, locale_t loc) {
-  __libcpp_locale_guard __current(loc);
+  std::__locale_guard __current(loc);
   return strftime(ret, n, format, tm);
 }
 #endif
diff --git a/libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp b/libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp
deleted file mode 100644
index c496fc32dc939f3..000000000000000
--- a/libcxx/test/libcxx/assertions/modes/enabling_assertions_enables_extensive_mode.pass.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// TODO(hardening): remove in LLVM 20.
-// This test ensures that enabling assertions with the legacy `_LIBCPP_ENABLE_ASSERTIONS` now enables the extensive
-// hardening mode.
-
-// `check_assertion.h` is only available starting from C++11 and requires Unix headers and regex support.
-// REQUIRES: has-unix-headers
-// UNSUPPORTED: c++03, no-localization
-// The ability to set a custom abort message is required to compare the assertion message (which only happens in the
-// debug mode).
-// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
-// HWASAN replaces TRAP with abort or error exit code.
-// XFAIL: hwasan
-// Note that GCC doesn't support `-Wno-macro-redefined`.
-// ADDITIONAL_COMPILE_FLAGS: -U_LIBCPP_HARDENING_MODE -D_LIBCPP_ENABLE_ASSERTIONS=1 -Wno-#warnings -Wno-cpp
-
-#include <cassert>
-#include "check_assertion.h"
-
-int main(int, char**) {
-  static_assert(_LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_EXTENSIVE,
-                "The extensive hardening mode should be implicitly enabled");
-
-  _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(true, "Should not fire");
-  TEST_LIBCPP_ASSERT_FAILURE([] { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(false, "Should fire"); }(), "Should fire");
-
-  return 0;
-}
diff --git a/libcxx/test/libcxx/clang_modules_include.gen.py b/libcxx/test/libcxx/clang_modules_include.gen.py
index bc028f2a0809aa0..b897984f898819f 100644
--- a/libcxx/test/libcxx/clang_modules_include.gen.py
+++ b/libcxx/test/libcxx/clang_modules_include.gen.py
@@ -17,10 +17,15 @@
 
 import sys
 sys.path.append(sys.argv[1])
-from libcxx.header_information import lit_header_restrictions, public_headers
+from libcxx.header_information import (
+    lit_header_restrictions,
+    lit_header_undeprecations,
+    public_headers,
+)
 
 for header in public_headers:
-  print(f"""\
+    print(
+        f"""\
 //--- {header}.compile.pass.cpp
 // RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only
 
@@ -41,9 +46,11 @@
 // UNSUPPORTED: LIBCXX-PICOLIBC-FIXME
 
 {lit_header_restrictions.get(header, '')}
+{lit_header_undeprecations.get(header, '')}
 
 #include <{header}>
-""")
+"""
+    )
 
 print(
     f"""\
diff --git a/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp
new file mode 100644
index 000000000000000..2db803b53441f1d
--- /dev/null
+++ b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.input_range.pass.cpp
@@ -0,0 +1,66 @@
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: has-unix-headers
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: libcpp-hardening-mode=none
+// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
+
+// <flat_map>
+
+// flat_map(key_container_type , mapped_container_type , const key_compare& __comp = key_compare())
+// flat_map(const key_container_type& , const mapped_container_type& , const _Allocator& )
+// flat_map(const key_container_type& , const mapped_container_type& , const key_compare&, const _Allocator& )
+// void replace(key_container_type&& , mapped_container_type&&)
+//
+
+#include <flat_map>
+#include <functional>
+#include <memory>
+#include <vector>
+
+#include "check_assertion.h"
+
+int main(int, char**) {
+  using M = std::flat_map<int, int>;
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] { M m({1, 2, 3}, {4}); }()), "flat_map keys and mapped containers have different size");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] { M m({1, 2, 3}, {4}, std::less<int>{}); }()), "flat_map keys and mapped containers have different size");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector keys{1, 2, 3};
+        const std::vector values{4};
+        const std::allocator<int> alloc{};
+        M m(keys, values, alloc);
+      }()),
+      "flat_map keys and mapped containers have different size");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector keys{1, 2, 3};
+        const std::vector values{4};
+        const std::less<int> key_compare{};
+        const std::allocator<int> alloc{};
+        M m(keys, values, key_compare, alloc);
+      }()),
+      "flat_map keys and mapped containers have different size");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::vector keys{1, 2, 3};
+        std::vector values{4};
+        M m;
+        m.replace(std::move(keys), std::move(values));
+      }()),
+      "flat_map keys and mapped containers have different size");
+
+  return 0;
+}
diff --git a/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp
new file mode 100644
index 000000000000000..e6bd3f385af9cb6
--- /dev/null
+++ b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/assert.sorted_unique.pass.cpp
@@ -0,0 +1,225 @@
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: has-unix-headers
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: libcpp-hardening-mode=none
+// REQUIRES: libcpp-hardening-mode=debug
+// XFAIL: libcpp-hardening-mode=debug && availability-verbose_abort-missing
+
+// <flat_map>
+
+// flat_map(key_container_type , mapped_container_type , const key_compare& __comp = key_compare())
+// flat_map(const key_container_type& , const mapped_container_type& , const _Allocator& )
+// flat_map(const key_container_type& , const mapped_container_type& , const key_compare&, const _Allocator& )
+// void replace(key_container_type&& , mapped_container_type&&)
+//
+
+#include <flat_map>
+#include <functional>
+#include <initializer_list>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "check_assertion.h"
+
+int main(int, char**) {
+  using M = std::flat_map<int, int>;
+
+  TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {2, 2, 3}, {4, 5, 6}); }()),
+                             "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {4, 2, 3}, {4, 5, 6}); }()),
+                             "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {2, 2, 3}, {4, 5, 6}, std::less<int>{}); }()),
+                             "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(([] { M m(std::sorted_unique, {4, 2, 3}, {4, 5, 6}, std::less<int>{}); }()),
+                             "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector keys{2, 2, 3};
+        const std::vector values{4, 5, 6};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, keys, values, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector keys{4, 2, 3};
+        const std::vector values{4, 5, 6};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, keys, values, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector keys{2, 2, 3};
+        const std::vector values{4, 5, 6};
+        const std::allocator<int> alloc{};
+        const std::less<int> comp{};
+        M m(std::sorted_unique, keys, values, comp, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector keys{4, 2, 3};
+        const std::vector values{4, 5, 6};
+        const std::allocator<int> alloc{};
+        const std::less<int> comp{};
+        M m(std::sorted_unique, keys, values, comp, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        M m(std::sorted_unique, v.begin(), v.end(), comp);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        M m(std::sorted_unique, v.begin(), v.end(), comp);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v.begin(), v.end(), comp, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v.begin(), v.end(), comp, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v.begin(), v.end(), alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v.begin(), v.end(), alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        M m(std::sorted_unique, v, comp);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        M m(std::sorted_unique, v, comp);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v, comp, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        const std::less<int> comp{};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v, comp, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        const std::allocator<int> alloc{};
+        M m(std::sorted_unique, v, alloc);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        M m;
+        m.insert(std::sorted_unique, v.begin(), v.end());
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        const std::vector<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        M m;
+        m.insert(std::sorted_unique, v.begin(), v.end());
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{2, 4}, {2, 5}, {3, 6}};
+        M m;
+        m.insert(std::sorted_unique, v);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::initializer_list<std::pair<int, int>> v{{4, 4}, {2, 5}, {3, 6}};
+        M m;
+        m.insert(std::sorted_unique, v);
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+
+  TEST_LIBCPP_ASSERT_FAILURE(
+      ([] {
+        std::vector keys{1, 1, 3};
+        std::vector values{4, 5, 6};
+        M m;
+        m.replace(std::move(keys), std::move(values));
+      }()),
+      "Either the key container is not sorted or it contains duplicates");
+  return 0;
+}
diff --git a/libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp
new file mode 100644
index 000000000000000..0d90c3250061ff4
--- /dev/null
+++ b/libcxx/test/libcxx/containers/containers.adaptors/flat.map/container_stability.pass.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(key_container_type key_cont, mapped_container_type mapped_cont);
+//
+// libc++ uses stable_sort to ensure that flat_map's behavior matches map's,
+// in terms of which duplicate items are kept.
+// This tests a conforming extension.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <flat_map>
+#include <random>
+#include <map>
+#include <vector>
+
+#include "test_macros.h"
+
+struct Mod256 {
+  bool operator()(int x, int y) const { return (x % 256) < (y % 256); }
+};
+
+int main(int, char**) {
+  std::mt19937 randomness;
+  std::vector<uint16_t> values;
+  std::vector<std::pair<uint16_t, uint16_t>> pairs;
+  for (int i = 0; i < 200; ++i) {
+    uint16_t r = randomness();
+    values.push_back(r);
+    pairs.emplace_back(r, r);
+  }
+
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs.begin(), pairs.end());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(values, values);
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs.begin(), pairs.end());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(values, values, Mod256());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs.begin(), pairs.end());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(values, values, std::allocator<int>());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs.begin(), pairs.end());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(values, values, Mod256(), std::allocator<int>());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  return 0;
+}
diff --git a/libcxx/test/libcxx/double_include.gen.py b/libcxx/test/libcxx/double_include.gen.py
index afc2947dbece94d..f58e72f94a35335 100644
--- a/libcxx/test/libcxx/double_include.gen.py
+++ b/libcxx/test/libcxx/double_include.gen.py
@@ -15,12 +15,18 @@
 
 import sys
 sys.path.append(sys.argv[1])
-from libcxx.header_information import lit_header_restrictions, public_headers
+from libcxx.header_information import (
+    lit_header_restrictions,
+    lit_header_undeprecations,
+    public_headers,
+)
 
 for header in public_headers:
-  print(f"""\
+    print(
+        f"""\
 //--- {header}.sh.cpp
 {lit_header_restrictions.get(header, '')}
+{lit_header_undeprecations.get(header, '')}
 
 // RUN: %{{cxx}} -c %s -o %t.first.o %{{flags}} %{{compile_flags}}
 // RUN: %{{cxx}} -c %s -o %t.second.o -DWITH_MAIN %{{flags}} %{{compile_flags}}
@@ -32,4 +38,5 @@
 #if defined(WITH_MAIN)
 int main(int, char**) {{ return 0; }}
 #endif
-""")
+"""
+    )
diff --git a/libcxx/test/libcxx/header_inclusions.gen.py b/libcxx/test/libcxx/header_inclusions.gen.py
index e5def1ad4cb70d9..739caf915c09a0c 100644
--- a/libcxx/test/libcxx/header_inclusions.gen.py
+++ b/libcxx/test/libcxx/header_inclusions.gen.py
@@ -12,32 +12,43 @@
 # RUN: %{python} %s %{libcxx-dir}/utils
 
 import sys
+
 sys.path.append(sys.argv[1])
-from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions
+from libcxx.header_information import (
+    lit_header_restrictions,
+    lit_header_undeprecations,
+    public_headers,
+    mandatory_inclusions,
+)
 
 for header in public_headers:
-  header_guard = lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}"
+    header_guard = (
+        lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}"
+    )
 
-  # <cassert> has no header guards
-  if header == 'cassert':
-    checks = ''
-  else:
-    checks = f'''
+    # <cassert> has no header guards
+    if header == "cassert":
+        checks = ""
+    else:
+        checks = f"""
 #ifndef {header_guard(header)}
 # error <{header}> was expected to define a header guard {header_guard(header)}
 #endif
-'''
-  for includee in mandatory_inclusions.get(header, []):
-    checks += f'''
+"""
+    for includee in mandatory_inclusions.get(header, []):
+        checks += f"""
 #ifndef {header_guard(includee)}
 # error <{header}> was expected to include <{includee}>
 #endif
-'''
+"""
 
-  print(f"""\
+    print(
+        f"""\
 //--- {header}.compile.pass.cpp
 {lit_header_restrictions.get(header, '')}
+{lit_header_undeprecations.get(header, '')}
 
 #include <{header}>
 {checks}
-""")
+"""
+    )
diff --git a/libcxx/test/libcxx/include_as_c.sh.cpp b/libcxx/test/libcxx/include_as_c.sh.cpp
index c9f8dfd9a5a9222..204b830462cf944 100644
--- a/libcxx/test/libcxx/include_as_c.sh.cpp
+++ b/libcxx/test/libcxx/include_as_c.sh.cpp
@@ -34,6 +34,7 @@
 #endif
 #include <math.h>
 #include <setjmp.h>
+#include <stdalign.h>
 #include <stdatomic.h>
 #include <stdbool.h>
 #include <stddef.h>
diff --git a/libcxx/test/libcxx/libcpp_version.gen.py b/libcxx/test/libcxx/libcpp_version.gen.py
index a9995295e21e4f4..b30623fe2c388bc 100644
--- a/libcxx/test/libcxx/libcpp_version.gen.py
+++ b/libcxx/test/libcxx/libcpp_version.gen.py
@@ -12,16 +12,23 @@
 
 import sys
 sys.path.append(sys.argv[1])
-from libcxx.header_information import lit_header_restrictions, public_headers
+from libcxx.header_information import (
+    lit_header_restrictions,
+    lit_header_undeprecations,
+    public_headers,
+)
 
 for header in public_headers:
-  print(f"""\
+    print(
+        f"""\
 //--- {header}.compile.pass.cpp
 {lit_header_restrictions.get(header, '')}
+{lit_header_undeprecations.get(header, '')}
 
 #include <{header}>
 
 #ifndef _LIBCPP_VERSION
 # error <{header}> does not seem to define _LIBCPP_VERSION
 #endif
-""")
+"""
+    )
diff --git a/libcxx/test/libcxx/no_assert_include.gen.py b/libcxx/test/libcxx/no_assert_include.gen.py
index 67ab98603ca8fde..e0dbc3d815f31b1 100644
--- a/libcxx/test/libcxx/no_assert_include.gen.py
+++ b/libcxx/test/libcxx/no_assert_include.gen.py
@@ -12,20 +12,28 @@
 # RUN: %{python} %s %{libcxx-dir}/utils
 
 import sys
+
 sys.path.append(sys.argv[1])
-from libcxx.header_information import lit_header_restrictions, public_headers
+from libcxx.header_information import (
+    lit_header_restrictions,
+    lit_header_undeprecations,
+    public_headers,
+)
 
 for header in public_headers:
-  if header == 'cassert':
-    continue
+    if header == "cassert":
+        continue
 
-  print(f"""\
+    print(
+        f"""\
 //--- {header}.compile.pass.cpp
 {lit_header_restrictions.get(header, '')}
+{lit_header_undeprecations.get(header, '')}
 
 #include <{header}>
 
 #ifdef assert
 # error "Do not include cassert or assert.h in standard header files"
 #endif
-""")
+"""
+    )
diff --git a/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp b/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp
new file mode 100644
index 000000000000000..1c790c283e43876
--- /dev/null
+++ b/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp
@@ -0,0 +1,84 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <complex>
+
+//  template<class T, class U> complex<__promote<T, U>::type> pow(const complex<T>&, const U&);
+//  template<class T, class U> complex<__promote<T, U>::type> pow(const complex<T>&, const complex<U>&);
+//  template<class T, class U> complex<__promote<T, U>::type> pow(const T&, const complex<U>&);
+
+// Test that these additional overloads are free from catching std::complex<non-floating-point>,
+// which is expected by several 3rd party libraries, see https://github.com/llvm/llvm-project/issues/109858.
+//
+// Note that we reserve the right to break this in the future if we have a reason to, but for the time being,
+// make sure we don't break this property unintentionally.
+#include <cassert>
+#include <cmath>
+#include <complex>
+#include <type_traits>
+
+#include "test_macros.h"
+
+namespace usr {
+struct usr_tag {};
+
+template <class T, class U>
+typename std::enable_if<(std::is_same<T, usr_tag>::value && std::is_floating_point<U>::value) ||
+                            (std::is_floating_point<T>::value && std::is_same<U, usr_tag>::value),
+                        int>::type
+pow(const T&, const std::complex<U>&) {
+  return std::is_same<T, usr_tag>::value ? 0 : 1;
+}
+
+template <class T, class U>
+typename std::enable_if<(std::is_same<T, usr_tag>::value && std::is_floating_point<U>::value) ||
+                            (std::is_floating_point<T>::value && std::is_same<U, usr_tag>::value),
+                        int>::type
+pow(const std::complex<T>&, const U&) {
+  return std::is_same<U, usr_tag>::value ? 2 : 3;
+}
+
+template <class T, class U>
+typename std::enable_if<(std::is_same<T, usr_tag>::value && std::is_floating_point<U>::value) ||
+                            (std::is_floating_point<T>::value && std::is_same<U, usr_tag>::value),
+                        int>::type
+pow(const std::complex<T>&, const std::complex<U>&) {
+  return std::is_same<T, usr_tag>::value ? 4 : 5;
+}
+} // namespace usr
+
+int main(int, char**) {
+  using std::pow;
+  using usr::pow;
+
+  usr::usr_tag tag;
+  const std::complex<usr::usr_tag> ctag;
+
+  assert(pow(tag, std::complex<float>(1.0f)) == 0);
+  assert(pow(std::complex<float>(1.0f), tag) == 2);
+  assert(pow(tag, std::complex<double>(1.0)) == 0);
+  assert(pow(std::complex<double>(1.0), tag) == 2);
+  assert(pow(tag, std::complex<long double>(1.0l)) == 0);
+  assert(pow(std::complex<long double>(1.0l), tag) == 2);
+
+  assert(pow(1.0f, ctag) == 1);
+  assert(pow(ctag, 1.0f) == 3);
+  assert(pow(1.0, ctag) == 1);
+  assert(pow(ctag, 1.0) == 3);
+  assert(pow(1.0l, ctag) == 1);
+  assert(pow(ctag, 1.0l) == 3);
+
+  assert(pow(ctag, std::complex<float>(1.0f)) == 4);
+  assert(pow(std::complex<float>(1.0f), ctag) == 5);
+  assert(pow(ctag, std::complex<double>(1.0)) == 4);
+  assert(pow(std::complex<double>(1.0), ctag) == 5);
+  assert(pow(ctag, std::complex<long double>(1.0l)) == 4);
+  assert(pow(std::complex<long double>(1.0l), ctag) == 5);
+
+  return 0;
+}
diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py
index e29e7a2cdd61449..f01126249c88171 100644
--- a/libcxx/test/libcxx/system_reserved_names.gen.py
+++ b/libcxx/test/libcxx/system_reserved_names.gen.py
@@ -13,14 +13,20 @@
 # RUN: %{python} %s %{libcxx-dir}/utils
 
 import sys
+
 sys.path.append(sys.argv[1])
-from libcxx.header_information import lit_header_restrictions, public_headers
+from libcxx.header_information import (
+    lit_header_restrictions,
+    lit_header_undeprecations,
+    public_headers,
+)
 
 for header in public_headers:
     print(
         f"""\
 //--- {header}.compile.pass.cpp
 {lit_header_restrictions.get(header, '')}
+{lit_header_undeprecations.get(header, '')}
 
 #define SYSTEM_RESERVED_NAME This name should not be used in libc++
 
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index 506b5cd02c4495e..48c501863cb76ca 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -458,7 +458,6 @@ ctgmath array
 ctgmath atomic
 ctgmath bit
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -682,6 +681,50 @@ filesystem typeinfo
 filesystem utility
 filesystem variant
 filesystem version
+flat_map algorithm
+flat_map array
+flat_map atomic
+flat_map bit
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map cmath
+flat_map compare
+flat_map concepts
+flat_map cstdarg
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map ctime
+flat_map cwchar
+flat_map cwctype
+flat_map exception
+flat_map initializer_list
+flat_map ios
+flat_map iosfwd
+flat_map iterator
+flat_map limits
+flat_map locale
+flat_map memory
+flat_map mutex
+flat_map new
+flat_map optional
+flat_map ratio
+flat_map stdexcept
+flat_map streambuf
+flat_map string
+flat_map string_view
+flat_map system_error
+flat_map tuple
+flat_map type_traits
+flat_map typeinfo
+flat_map utility
+flat_map variant
+flat_map vector
+flat_map version
 format algorithm
 format array
 format atomic
diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 506b5cd02c4495e..48c501863cb76ca 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -458,7 +458,6 @@ ctgmath array
 ctgmath atomic
 ctgmath bit
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -682,6 +681,50 @@ filesystem typeinfo
 filesystem utility
 filesystem variant
 filesystem version
+flat_map algorithm
+flat_map array
+flat_map atomic
+flat_map bit
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map cmath
+flat_map compare
+flat_map concepts
+flat_map cstdarg
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map ctime
+flat_map cwchar
+flat_map cwctype
+flat_map exception
+flat_map initializer_list
+flat_map ios
+flat_map iosfwd
+flat_map iterator
+flat_map limits
+flat_map locale
+flat_map memory
+flat_map mutex
+flat_map new
+flat_map optional
+flat_map ratio
+flat_map stdexcept
+flat_map streambuf
+flat_map string
+flat_map string_view
+flat_map system_error
+flat_map tuple
+flat_map type_traits
+flat_map typeinfo
+flat_map utility
+flat_map variant
+flat_map vector
+flat_map version
 format algorithm
 format array
 format atomic
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 828e1d62c6ec3e8..6191c9012c631b4 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -467,7 +467,6 @@ ctgmath array
 ctgmath atomic
 ctgmath bit
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -700,6 +699,51 @@ filesystem typeinfo
 filesystem utility
 filesystem variant
 filesystem version
+flat_map algorithm
+flat_map array
+flat_map atomic
+flat_map bit
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map cmath
+flat_map compare
+flat_map concepts
+flat_map cstdarg
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map ctime
+flat_map cwchar
+flat_map cwctype
+flat_map exception
+flat_map execution
+flat_map initializer_list
+flat_map ios
+flat_map iosfwd
+flat_map iterator
+flat_map limits
+flat_map locale
+flat_map memory
+flat_map mutex
+flat_map new
+flat_map optional
+flat_map ratio
+flat_map stdexcept
+flat_map streambuf
+flat_map string
+flat_map string_view
+flat_map system_error
+flat_map tuple
+flat_map type_traits
+flat_map typeinfo
+flat_map utility
+flat_map variant
+flat_map vector
+flat_map version
 format algorithm
 format array
 format atomic
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index 0bee6e9beb7af13..5d46162e3f8996d 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -458,7 +458,6 @@ ctgmath array
 ctgmath atomic
 ctgmath bit
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -706,6 +705,50 @@ filesystem utility
 filesystem variant
 filesystem vector
 filesystem version
+flat_map algorithm
+flat_map array
+flat_map atomic
+flat_map bit
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map cmath
+flat_map compare
+flat_map concepts
+flat_map cstdarg
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map ctime
+flat_map cwchar
+flat_map cwctype
+flat_map exception
+flat_map initializer_list
+flat_map ios
+flat_map iosfwd
+flat_map iterator
+flat_map limits
+flat_map locale
+flat_map memory
+flat_map mutex
+flat_map new
+flat_map optional
+flat_map ratio
+flat_map stdexcept
+flat_map streambuf
+flat_map string
+flat_map string_view
+flat_map system_error
+flat_map tuple
+flat_map type_traits
+flat_map typeinfo
+flat_map utility
+flat_map variant
+flat_map vector
+flat_map version
 format algorithm
 format array
 format atomic
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index 026c26f3bd98198..20fe9878ce3eae0 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -446,7 +446,6 @@ ctgmath array
 ctgmath atomic
 ctgmath bit
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -694,6 +693,50 @@ filesystem utility
 filesystem variant
 filesystem vector
 filesystem version
+flat_map algorithm
+flat_map array
+flat_map atomic
+flat_map bit
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map cmath
+flat_map compare
+flat_map concepts
+flat_map cstdarg
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map ctime
+flat_map cwchar
+flat_map cwctype
+flat_map exception
+flat_map initializer_list
+flat_map ios
+flat_map iosfwd
+flat_map iterator
+flat_map limits
+flat_map locale
+flat_map memory
+flat_map mutex
+flat_map new
+flat_map optional
+flat_map ratio
+flat_map stdexcept
+flat_map streambuf
+flat_map string
+flat_map string_view
+flat_map system_error
+flat_map tuple
+flat_map type_traits
+flat_map typeinfo
+flat_map utility
+flat_map variant
+flat_map vector
+flat_map version
 format algorithm
 format array
 format atomic
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index c3db06f15477693..5ee89ec307cc296 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -238,7 +238,6 @@ coroutine limits
 coroutine version
 cstddef version
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -381,6 +380,31 @@ filesystem string_view
 filesystem tuple
 filesystem typeinfo
 filesystem version
+flat_map array
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map compare
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map cwchar
+flat_map cwctype
+flat_map initializer_list
+flat_map iosfwd
+flat_map limits
+flat_map new
+flat_map optional
+flat_map stdexcept
+flat_map string
+flat_map string_view
+flat_map tuple
+flat_map typeinfo
+flat_map vector
+flat_map version
 format array
 format cctype
 format cerrno
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 8d7560344ee541c..ee17223e66bee4c 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -238,7 +238,6 @@ coroutine limits
 coroutine version
 cstddef version
 ctgmath bitset
-ctgmath ccomplex
 ctgmath cctype
 ctgmath cerrno
 ctgmath climits
@@ -381,6 +380,31 @@ filesystem string_view
 filesystem tuple
 filesystem typeinfo
 filesystem version
+flat_map array
+flat_map cctype
+flat_map cerrno
+flat_map climits
+flat_map clocale
+flat_map compare
+flat_map cstddef
+flat_map cstdint
+flat_map cstdio
+flat_map cstdlib
+flat_map cstring
+flat_map cwchar
+flat_map cwctype
+flat_map initializer_list
+flat_map iosfwd
+flat_map limits
+flat_map new
+flat_map optional
+flat_map stdexcept
+flat_map string
+flat_map string_view
+flat_map tuple
+flat_map typeinfo
+flat_map vector
+flat_map version
 format array
 format cctype
 format cerrno
diff --git a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
index 5130758d5efd52d..abb12d6a3c24730 100644
--- a/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
+++ b/libcxx/test/std/atomics/atomics.lockfree/is_always_lock_free.pass.cpp
@@ -18,7 +18,7 @@
 
 // Ignore diagnostic about vector types changing the ABI on some targets, since
 // that is irrelevant for this test.
-// ADDITIONAL_COMPILE_FLAGS: -Wno-psabi
+// ADDITIONAL_COMPILE_FLAGS(gcc-style-warnings): -Wno-psabi
 
 #include <atomic>
 #include <cassert>
diff --git a/libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h b/libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h
new file mode 100644
index 000000000000000..61fa3504e34e3a2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/NaiveStaticVector.h
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_NAIVE_STATIC_VECTOR_H
+#define SUPPORT_NAIVE_STATIC_VECTOR_H
+
+#include <cstddef>
+#include <utility>
+#include "test_iterators.h"
+#include "test_macros.h"
+
+template <class T, std::size_t N>
+struct NaiveStaticVector {
+  struct CapacityError {};
+
+  using value_type      = T;
+  using difference_type = short;
+  using size_type       = unsigned short;
+  using iterator        = random_access_iterator<T*>;
+  using const_iterator  = random_access_iterator<const T*>;
+
+  explicit NaiveStaticVector() = default;
+  template <class It>
+  explicit NaiveStaticVector(It first, It last) {
+    while (first != last)
+      insert(*first++);
+  }
+
+  // Moving-from a NaiveStaticVector leaves the source vector holding moved-from objects.
+  // This is intentional (the "Naive" in the name).
+  // Specifically, moving-out-of a sorted+uniqued NaiveStaticVector<MoveOnly>
+  // will leave it in a non-sorted+uniqued state.
+
+  NaiveStaticVector(const NaiveStaticVector&)            = default;
+  NaiveStaticVector(NaiveStaticVector&&)                 = default; // deliberately don't reset size_
+  NaiveStaticVector& operator=(const NaiveStaticVector&) = default;
+  NaiveStaticVector& operator=(NaiveStaticVector&&)      = default;
+
+  iterator begin() { return iterator(data_); }
+  const_iterator begin() const { return const_iterator(data_); }
+  const_iterator cbegin() const { return const_iterator(data_); }
+  iterator end() { return begin() + size(); }
+  const_iterator end() const { return begin() + size(); }
+  size_type size() const { return size_; }
+  bool empty() const { return size_ == 0; }
+
+  void clear() { size_ = 0; }
+
+  template <class It>
+  iterator insert(const_iterator pos, It first, It last) {
+    iterator result = pos - cbegin() + begin();
+    while (first != last) {
+      insert(pos++, *first++);
+    }
+    return result;
+  }
+
+  iterator insert(const_iterator pos, T value) {
+    if (size_ == N) {
+      throw CapacityError();
+    }
+    int i = pos - cbegin();
+    size_ += 1;
+    std::move_backward(&data_[i], &data_[size_ - 1], &data_[size_]);
+    data_[i] = std::move(value);
+    return begin() + i;
+  }
+
+  template <class... Args>
+  iterator emplace(const_iterator pos, Args&&... args) {
+    return insert(pos, T(std::forward<Args>(args)...));
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    int i = first - cbegin();
+    int j = last - cbegin();
+    std::move(&data_[j], &data_[size_], &data_[i]);
+    size_ -= (last - first);
+    return begin() + i;
+  }
+
+  iterator erase(const_iterator pos) { return erase(pos, std::next(pos)); }
+
+private:
+  T data_[N];
+  std::size_t size_ = 0;
+};
+
+#endif // SUPPORT_NAIVE_STATIC_VECTOR_H
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp
new file mode 100644
index 000000000000000..c602d2d3d38f794
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map.syn/sorted_unique.pass.cpp
@@ -0,0 +1,44 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// struct sorted_unique_t { explicit sorted_unique_t() = default; };
+// inline constexpr sorted_unique_t sorted_unique{};
+
+#include <cassert>
+#include <concepts>
+#include <flat_map>
+#include <type_traits>
+
+template <class T>
+void implicit_test(T) {}
+
+template <class T>
+concept HasImplicitDefaultCtor = requires { implicit_test<T>({}); };
+
+static_assert(std::is_default_constructible_v<std::sorted_unique_t>);
+static_assert(std::is_trivially_default_constructible_v<std::sorted_unique_t>);
+static_assert(!HasImplicitDefaultCtor<std::sorted_unique_t>);
+
+constexpr bool test() {
+  { [[maybe_unused]] std::sorted_unique_t s; }
+  { [[maybe_unused]] std::same_as<const std::sorted_unique_t&> decltype(auto) s = (std::sorted_unique); }
+  { [[maybe_unused]] std::same_as<const std::sorted_unique_t> decltype(auto) copy = std::sorted_unique; }
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp
new file mode 100644
index 000000000000000..d30055bf1701cdf
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at.pass.cpp
@@ -0,0 +1,92 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+//       mapped_type& at(const key_type& k);
+// const mapped_type& at(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <stdexcept>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using P = std::pair<int, double>;
+  P ar[]  = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  const int one = 1;
+  {
+    std::flat_map<int, double, std::less<int>, KeyContainer, ValueContainer> m(ar, ar + sizeof(ar) / sizeof(ar[0]));
+    ASSERT_SAME_TYPE(decltype(m.at(one)), double&);
+    assert(m.size() == 7);
+    assert(m.at(one) == 1.5);
+    m.at(1) = -1.5;
+    assert(m.at(1) == -1.5);
+    assert(m.at(2) == 2.5);
+    assert(m.at(3) == 3.5);
+    assert(m.at(4) == 4.5);
+    assert(m.at(5) == 5.5);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    try {
+      TEST_IGNORE_NODISCARD m.at(6);
+      assert(false);
+    } catch (std::out_of_range&) {
+    }
+#endif
+    assert(m.at(7) == 7.5);
+    assert(m.at(8) == 8.5);
+    assert(m.size() == 7);
+  }
+  {
+    const std::flat_map<int, double, std::less<int>, KeyContainer, ValueContainer> m(
+        ar, ar + sizeof(ar) / sizeof(ar[0]));
+    ASSERT_SAME_TYPE(decltype(m.at(one)), const double&);
+    assert(m.size() == 7);
+    assert(m.at(one) == 1.5);
+    assert(m.at(2) == 2.5);
+    assert(m.at(3) == 3.5);
+    assert(m.at(4) == 4.5);
+    assert(m.at(5) == 5.5);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    try {
+      TEST_IGNORE_NODISCARD m.at(6);
+      assert(false);
+    } catch (std::out_of_range&) {
+    }
+#endif
+    assert(m.at(7) == 7.5);
+    assert(m.at(8) == 8.5);
+    assert(m.size() == 7);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp
new file mode 100644
index 000000000000000..13edca915fd005c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/at_transparent.pass.cpp
@@ -0,0 +1,111 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> mapped_type&       at(const K& x);
+// template<class K> const mapped_type& at(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <stdexcept>
+
+#include "../helpers.h"
+#include "min_allocator.h"
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanAt           = requires(M m, Transparent<int> k) { m.at(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanAt<TransparentMap>);
+static_assert(CanAt<const TransparentMap>);
+static_assert(!CanAt<NonTransparentMap>);
+static_assert(!CanAt<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using P = std::pair<int, double>;
+  P ar[]  = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  const Transparent<int> one{1};
+  {
+    std::flat_map<int, double, TransparentComparator, KeyContainer, ValueContainer> m(
+        ar, ar + sizeof(ar) / sizeof(ar[0]));
+    ASSERT_SAME_TYPE(decltype(m.at(one)), double&);
+    assert(m.size() == 7);
+    assert(m.at(one) == 1.5);
+    m.at(one) = -1.5;
+    assert(m.at(Transparent<int>{1}) == -1.5);
+    assert(m.at(Transparent<int>{2}) == 2.5);
+    assert(m.at(Transparent<int>{3}) == 3.5);
+    assert(m.at(Transparent<int>{4}) == 4.5);
+    assert(m.at(Transparent<int>{5}) == 5.5);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    try {
+      TEST_IGNORE_NODISCARD m.at(Transparent<int>{6});
+      assert(false);
+    } catch (std::out_of_range&) {
+    }
+#endif
+    assert(m.at(Transparent<int>{7}) == 7.5);
+    assert(m.at(Transparent<int>{8}) == 8.5);
+    assert(m.size() == 7);
+  }
+  {
+    const std::flat_map<int, double, TransparentComparator, KeyContainer, ValueContainer> m(
+        ar, ar + sizeof(ar) / sizeof(ar[0]));
+    ASSERT_SAME_TYPE(decltype(m.at(one)), const double&);
+    assert(m.size() == 7);
+    assert(m.at(Transparent<int>{1}) == 1.5);
+    assert(m.at(Transparent<int>{2}) == 2.5);
+    assert(m.at(Transparent<int>{3}) == 3.5);
+    assert(m.at(Transparent<int>{4}) == 4.5);
+    assert(m.at(Transparent<int>{5}) == 5.5);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    try {
+      TEST_IGNORE_NODISCARD m.at(Transparent<int>{6});
+      assert(false);
+    } catch (std::out_of_range&) {
+    }
+#endif
+    assert(m.at(Transparent<int>{7}) == 7.5);
+    assert(m.at(Transparent<int>{8}) == 8.5);
+    assert(m.size() == 7);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    m.at(Transparent<int>{3});
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp
new file mode 100644
index 000000000000000..ea2f5d800878a23
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_key.pass.cpp
@@ -0,0 +1,77 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// mapped_type& operator[](const key_type& k);
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+
+// Constraints: is_constructible_v<mapped_type> is true.
+template <class M, class Input>
+concept CanIndex = requires(M m, Input k) { m[k]; };
+
+static_assert(CanIndex<std::flat_map<int, double>, const int&>);
+static_assert(!CanIndex<std::flat_map<int, NoDefaultCtr>, const int&>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using P = std::pair<int, double>;
+  P ar[]  = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  const int one = 1;
+  std::flat_map<int, double, std::less<int>, KeyContainer, ValueContainer> m(ar, ar + sizeof(ar) / sizeof(ar[0]));
+  ASSERT_SAME_TYPE(decltype(m[one]), double&);
+  assert(m.size() == 7);
+  assert(m[one] == 1.5);
+  assert(m.size() == 7);
+  m[1] = -1.5;
+  assert(m[1] == -1.5);
+  assert(m.size() == 7);
+  assert(m[6] == 0);
+  assert(m.size() == 8);
+  m[6] = 6.5;
+  assert(m[6] == 6.5);
+  assert(m.size() == 8);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto index_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap                             = std::decay_t<decltype(m)>;
+      const typename FlatMap::key_type key      = key_arg;
+      const typename FlatMap::mapped_type value = value_arg;
+      m[key]                                    = value;
+    };
+    test_emplace_exception_guarantee(index_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp
new file mode 100644
index 000000000000000..faacc3cfe8f96fa
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_rv_key.pass.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// mapped_type& operator[](key_type&& k);
+
+#include <flat_map>
+#include <deque>
+#include <functional>
+#include <cassert>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+
+// Constraints: is_constructible_v<mapped_type> is true.
+template <class M, class Input>
+concept CanIndex = requires(M m, Input k) { m[k]; };
+
+static_assert(CanIndex<std::flat_map<int, double>, int&&>);
+static_assert(!CanIndex<std::flat_map<int, NoDefaultCtr>, int&&>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  {
+    std::flat_map<MoveOnly, double, std::less<MoveOnly>, KeyContainer, ValueContainer> m;
+    ASSERT_SAME_TYPE(decltype(m[MoveOnly{}]), double&);
+    assert(m.size() == 0);
+    assert(m[1] == 0.0);
+    assert(m.size() == 1);
+    m[1] = -1.5;
+    assert(m[1] == -1.5);
+    assert(m.size() == 1);
+    assert(m[6] == 0);
+    assert(m.size() == 2);
+    m[6] = 6.5;
+    assert(m[6] == 6.5);
+    assert(m.size() == 2);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<MoveOnly>, std::vector<double>>();
+  test<std::deque<MoveOnly>, std::vector<double>>();
+  test<MinSequenceContainer<MoveOnly>, MinSequenceContainer<double>>();
+  test<std::vector<MoveOnly, min_allocator<MoveOnly>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto index_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap                             = std::decay_t<decltype(m)>;
+      typename FlatMap::key_type key            = key_arg;
+      const typename FlatMap::mapped_type value = value_arg;
+      m[std::move(key)]                         = value;
+    };
+    test_emplace_exception_guarantee(index_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp
new file mode 100644
index 000000000000000..24c08464f3158c4
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.access/index_transparent.pass.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> mapped_type& operator[](K&& x);
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints:
+// The qualified-id Compare::is_transparent is valid and denotes a type.
+// is_constructible_v<key_type, K> is true.
+// is_constructible_v<mapped_type, Args...> is true.
+// is_convertible_v<K&&, const_iterator> and is_convertible_v<K&&, iterator> are both false
+template <class M, class Input>
+concept CanIndex                      = requires(M m, Input k) { m[k]; };
+using TransparentMap                  = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap               = std::flat_map<int, double, NonTransparentComparator>;
+using TransparentNoDefaultCtrValueMap = std::flat_map<int, NoDefaultCtr, TransparentComparator>;
+
+static_assert(CanIndex<TransparentMap, ConvertibleTransparent<int>>);
+static_assert(!CanIndex<const TransparentMap, ConvertibleTransparent<int>>);
+
+static_assert(!CanIndex<NonTransparentMap, NonConvertibleTransparent<int>>);
+static_assert(!CanIndex<const NonTransparentMap, NonConvertibleTransparent<int>>);
+
+static_assert(!CanIndex<TransparentMap, NonConvertibleTransparent<int>>);
+static_assert(!CanIndex<const TransparentMap, NonConvertibleTransparent<int>>);
+
+static_assert(!CanIndex<TransparentNoDefaultCtrValueMap, ConvertibleTransparent<int>>);
+static_assert(!CanIndex<const TransparentNoDefaultCtrValueMap, ConvertibleTransparent<int>>);
+
+static_assert(!CanIndex<TransparentMap, TransparentMap::iterator>);
+static_assert(!CanIndex<TransparentMap, TransparentMap::const_iterator>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using P = std::pair<int, double>;
+  P ar[]  = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  const ConvertibleTransparent<int> one{1};
+  const ConvertibleTransparent<int> six{6};
+  {
+    std::flat_map<int, double, TransparentComparator, KeyContainer, ValueContainer> m(
+        ar, ar + sizeof(ar) / sizeof(ar[0]));
+    ASSERT_SAME_TYPE(decltype(m[one]), double&);
+    assert(m.size() == 7);
+    assert(m[one] == 1.5);
+    assert(m.size() == 7);
+    m[one] = -1.5;
+    assert(m[one] == -1.5);
+    assert(m.size() == 7);
+    assert(m[six] == 0);
+    assert(m.size() == 8);
+    m[six] = 6.5;
+    assert(m[six] == 6.5);
+    assert(m.size() == 8);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    m[ConvertibleTransparent<int>{3}];
+    assert(transparent_used);
+  }
+  {
+    auto index_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap                             = std::decay_t<decltype(m)>;
+      using Key                                 = typename FlatMap::key_type;
+      const typename FlatMap::mapped_type value = value_arg;
+      m[ConvertibleTransparent<Key>{key_arg}]   = value;
+    };
+    test_emplace_exception_guarantee(index_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp
new file mode 100644
index 000000000000000..5ecc2cf7c917bd2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.pass.cpp
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// [[nodiscard]] bool empty() const noexcept;
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using M = std::flat_map<int, double, std::less<int>, KeyContainer, ValueContainer>;
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.empty()), bool);
+  ASSERT_NOEXCEPT(m.empty());
+  assert(m.empty());
+  assert(std::as_const(m).empty());
+  m = {{1, 1.0}};
+  assert(!m.empty());
+  m.clear();
+  assert(m.empty());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp
new file mode 100644
index 000000000000000..cc8016182dcb664
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/empty.verify.cpp
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// [[nodiscard]] bool empty() const noexcept;
+
+#include <flat_map>
+
+#include "test_macros.h"
+
+int main(int, char**) {
+  std::flat_map<int, int> c;
+  c.empty(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp
new file mode 100644
index 000000000000000..87acdfd2cf6250a
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/max_size.pass.cpp
@@ -0,0 +1,76 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// size_type max_size() const noexcept;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <limits>
+#include <type_traits>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_allocator.h"
+#include "test_macros.h"
+
+int main(int, char**) {
+  {
+    using A1 = limited_allocator<int, 10>;
+    using A2 = limited_allocator<int, 20>;
+    using C  = std::flat_map<int, int, std::less<int>, std::vector<int, A1>, std::vector<int, A2>>;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= 10);
+    LIBCPP_ASSERT(c.max_size() == 10);
+  }
+  {
+    using A1 = limited_allocator<int, 10>;
+    using A2 = limited_allocator<int, 20>;
+    using C  = std::flat_map<int, int, std::less<int>, std::vector<int, A2>, std::vector<int, A1>>;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= 10);
+    LIBCPP_ASSERT(c.max_size() == 10);
+  }
+  {
+    using A = limited_allocator<int, (size_t)-1>;
+    using C = std::flat_map<int, int, std::less<int>, std::vector<int, A>, std::vector<int, A>>;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C::size_type max_dist = static_cast<C::size_type>(std::numeric_limits<C::difference_type>::max());
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= max_dist);
+    LIBCPP_ASSERT(c.max_size() == max_dist);
+  }
+  {
+    typedef std::flat_map<char, char> C;
+    ASSERT_SAME_TYPE(C::difference_type, std::ptrdiff_t);
+    ASSERT_SAME_TYPE(C::size_type, std::size_t);
+    const C::size_type max_dist = static_cast<C::size_type>(std::numeric_limits<C::difference_type>::max());
+    const C c;
+    ASSERT_NOEXCEPT(c.max_size());
+    ASSERT_SAME_TYPE(decltype(c.max_size()), C::size_type);
+    assert(c.max_size() <= max_dist);
+    assert(c.max_size() <= alloc_max_size(std::allocator<char>()));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp
new file mode 100644
index 000000000000000..957a860450091f9
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.capacity/size.pass.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// size_type size() const noexcept;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using M = std::flat_map<int, char, std::less<int>, KeyContainer, ValueContainer>;
+  {
+    const M m = {{1, 'a'}, {1, 'b'}, {4, 'd'}, {5, 'e'}, {5, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 3);
+  }
+  {
+    const M m = {{1, 'a'}};
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 1);
+  }
+  {
+    const M m;
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == 0);
+  }
+  {
+    M m;
+    std::size_t s = 1000000;
+    for (auto i = 0u; i < s; ++i) {
+      m.emplace(i, 'a');
+    }
+    ASSERT_SAME_TYPE(decltype(m.size()), std::size_t);
+    ASSERT_NOEXCEPT(m.size());
+    assert(m.size() == s);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp
new file mode 100644
index 000000000000000..3f8d2ed332d6b32
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/alloc.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class Allocator>
+//   explicit flat_map(const Allocator& a);
+
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const A2&>);
+  }
+  {
+    // explicit
+    using M =
+        std::flat_map<int,
+                      long,
+                      std::less<int>,
+                      std::vector<int, test_allocator<int>>,
+                      std::vector<long, test_allocator<long>>>;
+
+    static_assert(std::is_constructible_v<M, test_allocator<int>>);
+    static_assert(!std::is_convertible_v<test_allocator<int>, M>);
+  }
+  {
+    using A = test_allocator<short>;
+    using M =
+        std::flat_map<int,
+                      long,
+                      std::less<int>,
+                      std::vector<int, test_allocator<int>>,
+                      std::vector<long, test_allocator<long>>>;
+    M m(A(0, 5));
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.keys().get_allocator().get_id() == 5);
+    assert(m.values().get_allocator().get_id() == 5);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp
new file mode 100644
index 000000000000000..06bde71e79941e8
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/assign_initializer_list.pass.cpp
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map& operator=(initializer_list<value_type> il);
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  {
+    M m = {{8, 8}, {10, 10}};
+    assert(m.size() == 2);
+    m                              = {{3, 0}, {1, 0}, {2, 0}, {2, 1}, {3, 1}, {4, 0}, {3, 2}, {5, 0}, {6, 0}, {5, 1}};
+    std::pair<int, int> expected[] = {{1, 0}, {2, 0}, {3, 0}, {4, 0}, {5, 0}, {6, 0}};
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+  }
+  {
+    M m = {{10, 1}, {8, 1}};
+    assert(m.size() == 2);
+    m                                    = {{3, 2}};
+    std::pair<double, double> expected[] = {{3, 2}};
+    assert(std::ranges::equal(m, expected));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp
new file mode 100644
index 000000000000000..40a1710f55e4220
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/compare.pass.cpp
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// explicit flat_map(const key_compare& comp);
+// template <class Alloc>
+//   flat_map(const key_compare& comp, const Alloc& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using M1 = std::flat_map<int, int, C, std::vector<int, A1>, std::vector<int, A1>>;
+    using M2 = std::flat_map<int, int, C, std::vector<int, A1>, std::vector<int, A2>>;
+    using M3 = std::flat_map<int, int, C, std::vector<int, A2>, std::vector<int, A1>>;
+    static_assert(std::is_constructible_v<M1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const C&, const A2&>);
+  }
+  {
+    using C = test_less<int>;
+    auto m  = std::flat_map<int, char*, C>(C(3));
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp() == C(3));
+  }
+  {
+    // The one-argument ctor is explicit.
+    using C = test_less<int>;
+    static_assert(std::is_constructible_v<std::flat_map<int, char*, C>, C>);
+    static_assert(!std::is_convertible_v<C, std::flat_map<int, char*, C>>);
+
+    static_assert(std::is_constructible_v<std::flat_map<int, char*>, std::less<int>>);
+    static_assert(!std::is_convertible_v<std::less<int>, std::flat_map<int, char*>>);
+  }
+  {
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    auto m   = std::flat_map<int, short, C, std::vector<int, A1>, std::vector<short, A2>>(C(4), A1(5));
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // explicit(false)
+    using C                                                                    = test_less<int>;
+    using A1                                                                   = test_allocator<int>;
+    using A2                                                                   = test_allocator<short>;
+    std::flat_map<int, short, C, std::deque<int, A1>, std::deque<short, A2>> m = {C(4), A1(5)};
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // If an allocator is given, it must be usable by both containers.
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<>, std::vector<int>, std::vector<int, A>>;
+    static_assert(std::is_constructible_v<M, std::less<>>);
+    static_assert(!std::is_constructible_v<M, std::less<>, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::less<>, A>);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp
new file mode 100644
index 000000000000000..812e2c3e4f02a82
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/containers.pass.cpp
@@ -0,0 +1,184 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(key_container_type key_cont, mapped_container_type mapped_cont,
+//           const key_compare& comp = key_compare());
+// template<class Allocator>
+//   flat_map(const key_container_type& key_cont, const mapped_container_type& mapped_cont,
+//            const Allocator& a);
+// template<class Alloc>
+//   flat_map(const key_container_type& key_cont, const mapped_container_type& mapped_cont,
+//            const key_compare& comp, const Alloc& a);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+struct P {
+  int first;
+  int second;
+  template <class T, class U>
+  bool operator==(const std::pair<T, U>& rhs) const {
+    return MoveOnly(first) == rhs.first && MoveOnly(second) == rhs.second;
+  }
+};
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, const V1&, const V1&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const V1&, const V1&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const V1&, const V2&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const V2&, const V1&, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, const V1&, const V1&, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const V1&, const V1&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const V1&, const V2&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const V2&, const V1&, const C&, const A2&>);
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type)
+    using M              = std::flat_map<int, char>;
+    std::vector<int> ks  = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::vector<char> vs = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto m               = M(ks, vs);
+    assert((m.keys() == std::vector<int>{1, 2, 3}));
+    LIBCPP_ASSERT((m.values() == std::vector<char>{1, 4, 6}));
+
+    // explicit(false)
+    M m2 = {ks, vs};
+    assert(m2 == m);
+
+    m = M(std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m.keys() == std::vector<int>{1, 2, 3}));
+    LIBCPP_ASSERT((m.values() == std::vector<char>{1, 4, 6}));
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type)
+    // move-only
+    P expected[] = {{3, 2}, {2, 1}, {1, 3}};
+    using Ks     = std::deque<int, min_allocator<int>>;
+    using Vs     = std::vector<MoveOnly, min_allocator<MoveOnly>>;
+    using M      = std::flat_map<int, MoveOnly, std::greater<int>, Ks, Vs>;
+    Ks ks        = {1, 3, 2};
+    Vs vs;
+    vs.push_back(3);
+    vs.push_back(2);
+    vs.push_back(1);
+    auto m = M(std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert(std::ranges::equal(m, expected, std::equal_to<>()));
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type)
+    // container's allocators are used
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5));
+    auto vs = std::deque<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6));
+    auto m  = M(std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}}));
+    assert(m.keys().get_allocator() == A(5));
+    assert(m.values().get_allocator() == A(6));
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type, key_compare)
+    using C              = test_less<int>;
+    using M              = std::flat_map<int, char, C>;
+    std::vector<int> ks  = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::vector<char> vs = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto m               = M(ks, vs, C(4));
+    assert((m.keys() == std::vector<int>{1, 2, 3}));
+    LIBCPP_ASSERT((m.values() == std::vector<char>{1, 4, 6}));
+    assert(m.key_comp() == C(4));
+
+    // explicit(false)
+    M m2 = {ks, vs, C(4)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type, const Allocator&)
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5));
+    auto vs = std::deque<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6));
+    auto m  = M(ks, vs, A(4)); // replaces the allocators
+    assert(!ks.empty());       // it was an lvalue above
+    assert(!vs.empty());       // it was an lvalue above
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}}));
+    assert(m.keys().get_allocator() == A(4));
+    assert(m.values().get_allocator() == A(4));
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type, const Allocator&)
+    // explicit(false)
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(5));
+    auto vs = std::deque<int, A>({1, 1, 1, 2, 2, 3, 2, 3, 3}, A(6));
+    M m     = {ks, vs, A(4)}; // implicit ctor
+    assert(!ks.empty());      // it was an lvalue above
+    assert(!vs.empty());      // it was an lvalue above
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}}));
+    assert(m.keys().get_allocator() == A(4));
+    assert(m.values().get_allocator() == A(4));
+  }
+  {
+    // flat_map(key_container_type , mapped_container_type, key_compare, const Allocator&)
+    using C                = test_less<int>;
+    using A                = test_allocator<int>;
+    using M                = std::flat_map<int, int, C, std::vector<int, A>, std::vector<int, A>>;
+    std::vector<int, A> ks = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::vector<int, A> vs = {1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto m                 = M(ks, vs, C(4), A(5));
+    assert((m.keys() == std::vector<int, A>{1, 2, 3}));
+    LIBCPP_ASSERT((m.values() == std::vector<int, A>{1, 4, 6}));
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A(5));
+    assert(m.values().get_allocator() == A(5));
+
+    // explicit(false)
+    M m2 = {ks, vs, C(4), A(5)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+    assert(m2.keys().get_allocator() == A(5));
+    assert(m2.values().get_allocator() == A(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp
new file mode 100644
index 000000000000000..fcd0415088c1c96
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy.pass.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(const flat_map& m);
+
+#include <cassert>
+#include <flat_map>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    using C = test_less<int>;
+    std::vector<int, test_allocator<int>> ks({1, 3, 5}, test_allocator<int>(6));
+    std::vector<char, test_allocator<char>> vs({2, 2, 1}, test_allocator<char>(7));
+    using M = std::flat_map<int, char, C, decltype(ks), decltype(vs)>;
+    auto mo = M(ks, vs, C(5));
+    auto m  = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == test_allocator<int>(6));
+    assert(m.values().get_allocator() == test_allocator<char>(7));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == test_allocator<int>(6));
+    assert(mo.values().get_allocator() == test_allocator<char>(7));
+  }
+  {
+    using C  = test_less<int>;
+    using Ks = std::vector<int, other_allocator<int>>;
+    using Vs = std::vector<char, other_allocator<char>>;
+    auto ks  = Ks({1, 3, 5}, other_allocator<int>(6));
+    auto vs  = Vs({2, 2, 1}, other_allocator<char>(7));
+    using M  = std::flat_map<int, char, C, Ks, Vs>;
+    auto mo  = M(Ks(ks, other_allocator<int>(6)), Vs(vs, other_allocator<int>(7)), C(5));
+    auto m   = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == other_allocator<int>(-2));
+    assert(m.values().get_allocator() == other_allocator<char>(-2));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == other_allocator<int>(6));
+    assert(mo.values().get_allocator() == other_allocator<char>(7));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp
new file mode 100644
index 000000000000000..cbda6ea853268af
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_alloc.pass.cpp
@@ -0,0 +1,67 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(const flat_map&, const allocator_type&);
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, const M1&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, const M1&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, const M2&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, const M3&, const A2&>);
+  }
+  {
+    using C = test_less<int>;
+    std::vector<int, test_allocator<int>> ks({1, 3, 5}, test_allocator<int>(6));
+    std::vector<char, test_allocator<char>> vs({2, 2, 1}, test_allocator<char>(7));
+    using M = std::flat_map<int, char, C, decltype(ks), decltype(vs)>;
+    auto mo = M(ks, vs, C(5));
+    auto m  = M(mo, test_allocator<int>(3));
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == test_allocator<int>(3));
+    assert(m.values().get_allocator() == test_allocator<char>(3));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == test_allocator<int>(6));
+    assert(mo.values().get_allocator() == test_allocator<char>(7));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp
new file mode 100644
index 000000000000000..e9b752d5eb12b04
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map& operator=(const flat_map& s);
+
+// Validate whether the container can be copy-assigned (move-assigned, swapped)
+// with an ADL-hijacking operator&
+
+#include <flat_map>
+#include <utility>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::flat_map<operator_hijacker, operator_hijacker> so;
+  std::flat_map<operator_hijacker, operator_hijacker> s;
+  s = so;
+  s = std::move(so);
+  swap(s, so);
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp
new file mode 100644
index 000000000000000..4f9797d5bf810af
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/copy_assign.pass.cpp
@@ -0,0 +1,92 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map& operator=(const flat_map& m);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // test_allocator is not propagated
+    using C = test_less<int>;
+    std::vector<int, test_allocator<int>> ks({1, 3, 5}, test_allocator<int>(6));
+    std::vector<char, test_allocator<char>> vs({2, 2, 1}, test_allocator<char>(7));
+    using M = std::flat_map<int, char, C, decltype(ks), decltype(vs)>;
+    auto mo = M(ks, vs, C(5));
+    auto m  = M({{3, 3}, {4, 4}, {5, 5}}, C(3), test_allocator<int>(2));
+    m       = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == test_allocator<int>(2));
+    assert(m.values().get_allocator() == test_allocator<char>(2));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == test_allocator<int>(6));
+    assert(mo.values().get_allocator() == test_allocator<char>(7));
+  }
+  {
+    // other_allocator is propagated
+    using C  = test_less<int>;
+    using Ks = std::vector<int, other_allocator<int>>;
+    using Vs = std::vector<char, other_allocator<char>>;
+    auto ks  = Ks({1, 3, 5}, other_allocator<int>(6));
+    auto vs  = Vs({2, 2, 1}, other_allocator<char>(7));
+    using M  = std::flat_map<int, char, C, Ks, Vs>;
+    auto mo  = M(Ks(ks, other_allocator<int>(6)), Vs(vs, other_allocator<int>(7)), C(5));
+    auto m   = M({{3, 3}, {4, 4}, {5, 5}}, C(3), other_allocator<int>(2));
+    m        = mo;
+
+    assert(m.key_comp() == C(5));
+    assert(m.keys() == ks);
+    assert(m.values() == vs);
+    assert(m.keys().get_allocator() == other_allocator<int>(6));
+    assert(m.values().get_allocator() == other_allocator<char>(7));
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys() == ks);
+    assert(mo.values() == vs);
+    assert(mo.keys().get_allocator() == other_allocator<int>(6));
+    assert(mo.values().get_allocator() == other_allocator<char>(7));
+  }
+  {
+    // comparator is copied and invariant is preserved
+    using M = std::flat_map<int, int, std::function<bool(int, int)>>;
+    M mo    = M({{1, 2}, {3, 4}}, std::less<int>());
+    M m     = M({{1, 2}, {3, 4}}, std::greater<int>());
+    assert(m.key_comp()(2, 1) == true);
+    assert(m != mo);
+    m = mo;
+    assert(m.key_comp()(2, 1) == false);
+    assert(m == mo);
+  }
+  {
+    // self-assignment
+    using M = std::flat_map<int, int>;
+    M m     = {{1, 2}, {3, 4}};
+    m       = static_cast<const M&>(m);
+    assert((m == M{{1, 2}, {3, 4}}));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp
new file mode 100644
index 000000000000000..d01bee9aae9c086
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.pass.cpp
@@ -0,0 +1,342 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <deque>
+#include <initializer_list>
+#include <list>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "deduction_guides_sfinae_checks.h"
+#include "test_allocator.h"
+
+using P  = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+void test_copy() {
+  {
+    std::flat_map<long, short> source = {{1, 2}, {2, 3}};
+    std::flat_map s(source);
+    ASSERT_SAME_TYPE(decltype(s), decltype(source));
+    assert(s == source);
+  }
+  {
+    std::flat_map<long, short, std::greater<long>> source = {{1, 2}, {2, 3}};
+    std::flat_map s{source}; // braces instead of parens
+    ASSERT_SAME_TYPE(decltype(s), decltype(source));
+    assert(s == source);
+  }
+  {
+    std::flat_map<long, short, std::greater<long>> source = {{1, 2}, {2, 3}};
+    std::flat_map s(source, std::allocator<int>());
+    ASSERT_SAME_TYPE(decltype(s), decltype(source));
+    assert(s == source);
+  }
+}
+
+void test_containers() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 1, 4, 5}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({1, 2, 3, INT_MAX}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({1, 2, 5, 4}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}};
+  {
+    std::flat_map s(ks, vs);
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs);
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_map s(ks, vs, test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+  {
+    std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs, test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::less<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+}
+
+void test_containers_compare() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 1, 4, 5}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({INT_MAX, 3, 2, 1}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({4, 5, 2, 1}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}};
+  {
+    std::flat_map s(ks, vs, std::greater<int>());
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs, std::greater<int>());
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 43);
+  }
+  {
+    std::flat_map s(ks, vs, std::greater<int>(), test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+  {
+    std::flat_map s(std::sorted_unique, sorted_ks, sorted_vs, std::greater<int>(), test_allocator<long>(0, 44));
+
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::greater<int>, decltype(ks), decltype(vs)>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 44);
+    assert(s.values().get_allocator().get_id() == 44);
+  }
+}
+
+void test_iter_iter() {
+  const P arr[]          = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const P sorted_arr[]   = {{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}};
+  const PC arrc[]        = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const PC sorted_arrc[] = {{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}};
+  {
+    std::flat_map m(std::begin(arr), std::end(arr));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::begin(arrc), std::end(arrc));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::sorted_unique, std::begin(sorted_arr), std::end(sorted_arr));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::sorted_unique, std::begin(sorted_arrc), std::end(sorted_arrc));
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map<int, short> mo;
+    std::flat_map m(mo.begin(), mo.end());
+    ASSERT_SAME_TYPE(decltype(m), decltype(mo));
+  }
+  {
+    std::flat_map<int, short> mo;
+    std::flat_map m(mo.cbegin(), mo.cend());
+    ASSERT_SAME_TYPE(decltype(m), decltype(mo));
+  }
+}
+
+void test_iter_iter_compare() {
+  const P arr[]          = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const P sorted_arr[]   = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}};
+  const PC arrc[]        = {{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+  const PC sorted_arrc[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}};
+  using C                = std::greater<long long>;
+  {
+    std::flat_map m(std::begin(arr), std::end(arr), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::begin(arrc), std::end(arrc), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::sorted_unique, std::begin(sorted_arr), std::end(sorted_arr), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::sorted_unique, std::begin(sorted_arrc), std::end(sorted_arrc), C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map<int, short> mo;
+    std::flat_map m(mo.begin(), mo.end(), C());
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, short, C>);
+  }
+  {
+    std::flat_map<int, short> mo;
+    std::flat_map m(mo.cbegin(), mo.cend(), C());
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, short, C>);
+  }
+}
+
+void test_initializer_list() {
+  const P sorted_arr[] = {{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}};
+  {
+    std::flat_map m{std::pair{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}};
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::sorted_unique, {std::pair{1, 1L}, {2, 2L}, {3, 1L}, {INT_MAX, 1L}});
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+}
+
+void test_initializer_list_compare() {
+  const P sorted_arr[] = {{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}};
+  using C              = std::greater<long long>;
+  {
+    std::flat_map m({std::pair{1, 1L}, {2, 2L}, {1, 1L}, {INT_MAX, 1L}, {3, 1L}}, C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+  {
+    std::flat_map m(std::sorted_unique, {std::pair{INT_MAX, 1L}, {3, 1L}, {2, 2L}, {1, 1L}}, C());
+
+    ASSERT_SAME_TYPE(decltype(m), std::flat_map<int, long, C>);
+    assert(std::ranges::equal(m, sorted_arr));
+  }
+}
+
+void test_from_range() {
+  std::list<std::pair<int, short>> r     = {{1, 1}, {2, 2}, {1, 1}, {INT_MAX, 4}, {3, 5}};
+  const std::pair<int, short> expected[] = {{1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}};
+  {
+    std::flat_map s(std::from_range, r);
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::less<int>>);
+    assert(std::ranges::equal(s, expected));
+  }
+  {
+    std::flat_map s(std::from_range, r, test_allocator<long>(0, 42));
+    ASSERT_SAME_TYPE(
+        decltype(s),
+        std::flat_map<int,
+                      short,
+                      std::less<int>,
+                      std::vector<int, test_allocator<int>>,
+                      std::vector<short, test_allocator<short>>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 42);
+  }
+}
+
+void test_from_range_compare() {
+  std::list<std::pair<int, short>> r     = {{1, 1}, {2, 2}, {1, 1}, {INT_MAX, 4}, {3, 5}};
+  const std::pair<int, short> expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}};
+  {
+    std::flat_map s(std::from_range, r, std::greater<int>());
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, short, std::greater<int>>);
+    assert(std::ranges::equal(s, expected));
+  }
+  {
+    std::flat_map s(std::from_range, r, std::greater<int>(), test_allocator<long>(0, 42));
+    ASSERT_SAME_TYPE(
+        decltype(s),
+        std::flat_map<int,
+                      short,
+                      std::greater<int>,
+                      std::vector<int, test_allocator<int>>,
+                      std::vector<short, test_allocator<short>>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().get_id() == 42);
+    assert(s.values().get_allocator().get_id() == 42);
+  }
+}
+
+int main(int, char**) {
+  // Each test function also tests the sorted_unique-prefixed and allocator-suffixed overloads.
+  test_copy();
+  test_containers();
+  test_containers_compare();
+  test_iter_iter();
+  test_iter_iter_compare();
+  test_initializer_list();
+  test_initializer_list_compare();
+  test_from_range();
+  test_from_range_compare();
+
+  AssociativeContainerDeductionGuidesSfinaeAway<std::flat_map, std::flat_map<int, short>>();
+  {
+    std::flat_map s = {std::make_pair(1, 'a')}; // flat_map(initializer_list<pair<int, char>>)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, char>);
+    assert(s.size() == 1);
+  }
+  {
+    using M = std::flat_map<int, short>;
+    M m;
+    std::flat_map s = {std::make_pair(m, m)}; // flat_map(initializer_list<pair<M, M>>)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<M, M>);
+    assert(s.size() == 1);
+    assert(s[m] == m);
+  }
+
+  {
+    std::pair<int, int> source[3] = {{1, 1}, {2, 2}, {3, 3}};
+    std::flat_map s               = {source, source + 3}; // flat_map(InputIterator, InputIterator)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, int>);
+    assert(s.size() == 3);
+  }
+  {
+    std::pair<int, int> source[3] = {{1, 1}, {2, 2}, {3, 3}};
+    std::flat_map s{source, source + 3}; // flat_map(InputIterator, InputIterator)
+    ASSERT_SAME_TYPE(decltype(s), std::flat_map<int, int>);
+    assert(s.size() == 3);
+  }
+  {
+    std::pair<int, int> source[3] = {{1, 1}, {2, 2}, {3, 3}};
+    std::flat_map s{std::sorted_unique, source, source + 3}; // flat_map(sorted_unique_t, InputIterator, InputIterator)
+    static_assert(std::is_same_v<decltype(s), std::flat_map<int, int>>);
+    assert(s.size() == 3);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp
new file mode 100644
index 000000000000000..08244f01cb24e13
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct.verify.cpp
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// Test CTAD on cases where deduction should fail.
+
+#include <flat_map>
+#include <functional>
+#include <memory>
+#include <utility>
+#include <vector>
+
+struct NotAnAllocator {
+  friend bool operator<(NotAnAllocator, NotAnAllocator) { return false; }
+};
+
+using P  = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+void test() {
+  {
+    // cannot deduce Key and T from just (KeyContainer), even if it's a container of pairs
+    std::vector<std::pair<int, int>> v;
+    std::flat_map s(v);
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (KeyContainer, Allocator)
+    std::vector<int> v;
+    std::flat_map s(v, std::allocator<std::pair<const int, int>>());
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce Key and T from nothing
+    std::flat_map m;
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (Compare)
+    std::flat_map m(std::less<int>{});
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (Compare, Allocator)
+    std::flat_map m(std::less<int>{}, std::allocator<PC>{});
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce Key and T from just (Allocator)
+    std::flat_map m(std::allocator<PC>{});
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot convert from some arbitrary unrelated type
+    NotAnAllocator a;
+    std::flat_map m(a);
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_map m{{1, 1L}, {2, 2L}, {3, 3L}};
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_map m({{1, 1L}, {2, 2L}, {3, 3L}}, std::less<int>());
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_map m({{1, 1L}, {2, 2L}, {3, 3L}}, std::less<int>(), std::allocator<PC>());
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // cannot deduce that the inner braced things should be std::pair and not something else
+    std::flat_map m({{1, 1L}, {2, 2L}, {3, 3L}}, std::allocator<PC>());
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // since we have parens, not braces, this deliberately does not find the initializer_list constructor
+    std::flat_map m(P{1, 1L});
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+  {
+    // since we have parens, not braces, this deliberately does not find the initializer_list constructor
+    std::flat_map m(PC{1, 1L});
+    // expected-error-re@-1{{{{no viable constructor or deduction guide for deduction of template arguments of '.*flat_map'}}}}
+  }
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp
new file mode 100644
index 000000000000000..11c18ac13c76a92
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/deduct_pmr.pass.cpp
@@ -0,0 +1,106 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: availability-pmr-missing
+
+// <flat_map>
+
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <deque>
+#include <initializer_list>
+#include <list>
+#include <flat_map>
+#include <functional>
+#include <memory_resource>
+#include <ranges>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "test_allocator.h"
+
+using P  = std::pair<int, long>;
+using PC = std::pair<const int, long>;
+
+void test_containers() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 1, 4, 5}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({1, 2, 3, INT_MAX}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({1, 2, 5, 4}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{1, 1}, {2, 2}, {3, 5}, {INT_MAX, 4}};
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(ks.begin(), ks.end(), &mr);
+    std::pmr::deque<short> pvs(vs.begin(), vs.end(), &mr);
+    std::flat_map s(std::move(pks), std::move(pvs), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_map<int, short, std::less<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(sorted_ks.begin(), sorted_ks.end(), &mr);
+    std::pmr::deque<short> pvs(sorted_vs.begin(), sorted_vs.end(), &mr);
+    std::flat_map s(std::sorted_unique, std::move(pks), std::move(pvs), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_map<int, short, std::less<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+}
+
+void test_containers_compare() {
+  std::deque<int, test_allocator<int>> ks({1, 2, 1, INT_MAX, 3}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> vs({1, 2, 1, 4, 5}, test_allocator<int>(0, 43));
+  std::deque<int, test_allocator<int>> sorted_ks({INT_MAX, 3, 2, 1}, test_allocator<int>(0, 42));
+  std::deque<short, test_allocator<short>> sorted_vs({4, 5, 2, 1}, test_allocator<int>(0, 43));
+  const std::pair<int, short> expected[] = {{INT_MAX, 4}, {3, 5}, {2, 2}, {1, 1}};
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(ks.begin(), ks.end(), &mr);
+    std::pmr::deque<short> pvs(vs.begin(), vs.end(), &mr);
+    std::flat_map s(std::move(pks), std::move(pvs), std::greater<int>(), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_map<int, short, std::greater<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+  {
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::monotonic_buffer_resource mr2;
+    std::pmr::deque<int> pks(sorted_ks.begin(), sorted_ks.end(), &mr);
+    std::pmr::deque<short> pvs(sorted_vs.begin(), sorted_vs.end(), &mr);
+    std::flat_map s(std::sorted_unique, std::move(pks), std::move(pvs), std::greater<int>(), &mr2);
+
+    ASSERT_SAME_TYPE(
+        decltype(s), std::flat_map<int, short, std::greater<int>, std::pmr::deque<int>, std::pmr::deque<short>>);
+    assert(std::ranges::equal(s, expected));
+    assert(s.keys().get_allocator().resource() == &mr2);
+    assert(s.values().get_allocator().resource() == &mr2);
+  }
+}
+
+int main(int, char**) {
+  test_containers();
+  test_containers_compare();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp
new file mode 100644
index 000000000000000..c5b94896b92931c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map();
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+
+struct DefaultCtableComp {
+  explicit DefaultCtableComp() { default_constructed_ = true; }
+  bool operator()(int, int) const { return false; }
+  bool default_constructed_ = false;
+};
+
+int main(int, char**) {
+  {
+    std::flat_map<int, char*> m;
+    assert(m.empty());
+  }
+  {
+    // explicit(false)
+    std::flat_map<int, char*> m = {};
+    assert(m.empty());
+  }
+  {
+    std::flat_map<int, char*, DefaultCtableComp, std::deque<int, min_allocator<int>>> m;
+    assert(m.empty());
+    assert(m.begin() == m.end());
+    assert(m.key_comp().default_constructed_);
+  }
+  {
+    using A1 = explicit_allocator<int>;
+    using A2 = explicit_allocator<char*>;
+    {
+      std::flat_map<int, char*, DefaultCtableComp, std::vector<int, A1>, std::vector<char*, A2>> m;
+      assert(m.empty());
+      assert(m.key_comp().default_constructed_);
+    }
+    {
+      A1 a1;
+      std::flat_map<int, int, DefaultCtableComp, std::vector<int, A1>, std::vector<int, A1>> m(a1);
+      assert(m.empty());
+      assert(m.key_comp().default_constructed_);
+    }
+  }
+  {
+    // If an allocator is given, it must be usable by both containers.
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<>, std::vector<int>, std::vector<int, A>>;
+    static_assert(std::is_constructible_v<M>);
+    static_assert(!std::is_constructible_v<M, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, A>);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp
new file mode 100644
index 000000000000000..ac24c8a8ac067ee
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/default_noexcept.pass.cpp
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map()
+//    noexcept(
+//        is_nothrow_default_constructible_v<key_container_type> &&
+//        is_nothrow_default_constructible_v<mapped_container_type> &&
+//        is_nothrow_default_constructible_v<key_compare>);
+
+// This tests a conforming extension
+
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+
+struct ThrowingCtorComp {
+  ThrowingCtorComp() noexcept(false) {}
+  bool operator()(const auto&, const auto&) const { return false; }
+};
+
+int main(int, char**) {
+#if defined(_LIBCPP_VERSION)
+  {
+    using C = std::flat_map<MoveOnly, MoveOnly>;
+    static_assert(std::is_nothrow_default_constructible_v<C>);
+  }
+  {
+    using C = std::flat_map<MoveOnly, MoveOnly, std::less<MoveOnly>, std::vector<MoveOnly, test_allocator<MoveOnly>>>;
+    static_assert(std::is_nothrow_default_constructible_v<C>);
+  }
+#endif // _LIBCPP_VERSION
+  {
+    using C = std::flat_map<MoveOnly, MoveOnly, std::less<MoveOnly>, std::vector<MoveOnly, other_allocator<MoveOnly>>>;
+    static_assert(!std::is_nothrow_default_constructible_v<C>);
+    C c;
+  }
+  {
+    using C = std::flat_map<MoveOnly, MoveOnly, ThrowingCtorComp>;
+    static_assert(!std::is_nothrow_default_constructible_v<C>);
+    C c;
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp
new file mode 100644
index 000000000000000..e3ab33a55d95bf5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/dtor_noexcept.pass.cpp
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// ~flat_map();
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+
+struct ThrowingDtorComp {
+  bool operator()(const auto&, const auto&) const;
+  ~ThrowingDtorComp() noexcept(false);
+};
+
+int main(int, char**) {
+  {
+    using C = std::flat_map<MoveOnly, MoveOnly>;
+    static_assert(std::is_nothrow_destructible_v<C>);
+  }
+  {
+    using V = std::vector<MoveOnly, test_allocator<MoveOnly>>;
+    using C = std::flat_map<MoveOnly, MoveOnly, std::less<MoveOnly>, V, V>;
+    static_assert(std::is_nothrow_destructible_v<C>);
+  }
+  {
+    using V = std::deque<MoveOnly, other_allocator<MoveOnly>>;
+    using C = std::flat_map<MoveOnly, MoveOnly, std::greater<MoveOnly>, V, V>;
+    static_assert(std::is_nothrow_destructible_v<C>);
+  }
+#if defined(_LIBCPP_VERSION)
+  {
+    using C = std::flat_map<MoveOnly, MoveOnly, ThrowingDtorComp>;
+    static_assert(!std::is_nothrow_destructible_v<C>);
+  }
+#endif // _LIBCPP_VERSION
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp
new file mode 100644
index 000000000000000..7a22746845d002b
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/initializer_list.pass.cpp
@@ -0,0 +1,157 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(initializer_list<value_type> il, const key_compare& comp = key_compare());
+// template<class Alloc>
+//    flat_map(initializer_list<value_type> il, const Alloc& a);
+// template<class Alloc>
+//    flat_map(initializer_list<value_type> il, const key_compare& comp, const Alloc& a);
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+
+#include "../../../test_compare.h"
+
+struct DefaultCtableComp {
+  explicit DefaultCtableComp() { default_constructed_ = true; }
+  bool operator()(int, int) const { return false; }
+  bool default_constructed_ = false;
+};
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    using IL = std::initializer_list<std::pair<int, int>>;
+    static_assert(std::is_constructible_v<M1, IL, const A1&>);
+    static_assert(!std::is_constructible_v<M1, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M2, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M3, IL, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, IL, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, IL, const C&, const A2&>);
+  }
+
+  {
+    // initializer_list<value_type> needs to match exactly
+    using M = std::flat_map<int, short>;
+    using C = typename M::key_compare;
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>>);
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>, C>);
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>, C, std::allocator<int>>);
+    static_assert(std::is_constructible_v<M, std::initializer_list<std::pair<int, short>>, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>, C>);
+    static_assert(
+        !std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>, C, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, short>>, std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>>);
+    static_assert(!std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>, C>);
+    static_assert(
+        !std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>, C, std::allocator<int>>);
+    static_assert(
+        !std::is_constructible_v<M, std::initializer_list<std::pair<const int, const short>>, std::allocator<int>>);
+  }
+
+  std::pair<int, short> expected[] = {{1, 1}, {2, 2}, {3, 3}, {5, 2}};
+  {
+    // flat_map(initializer_list<value_type>);
+    using M                                         = std::flat_map<int, short>;
+    std::initializer_list<std::pair<int, short>> il = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}};
+    M m(il);
+    assert(std::equal(m.begin(), m.end(), expected, expected + 4));
+  }
+  {
+    // flat_map(initializer_list<value_type>);
+    // explicit(false)
+    using M = std::flat_map<int, short>;
+    M m     = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}};
+    assert(std::equal(m.begin(), m.end(), expected, expected + 4));
+  }
+  {
+    // flat_map(initializer_list<value_type>);
+    using M = std::flat_map<int, short, std::greater<int>, std::deque<int, min_allocator<int>>>;
+    M m     = {{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}};
+    assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4));
+  }
+  {
+    using A = explicit_allocator<int>;
+    {
+      // flat_map(initializer_list<value_type>);
+      // different comparator
+      using M = std::flat_map<int, int, DefaultCtableComp, std::vector<int, A>, std::deque<int, A>>;
+      M m     = {{1, 1}, {2, 2}, {3, 3}};
+      assert(m.size() == 1);
+      assert(m.begin()->first == m.begin()->second);
+      LIBCPP_ASSERT(*m.begin() == std::make_pair(1, 1));
+      assert(m.key_comp().default_constructed_);
+    }
+    {
+      // flat_map(initializer_list<value_type>, const Allocator&);
+      using M = std::flat_map<int, int, std::greater<int>, std::deque<int, A>, std::vector<int, A>>;
+      A a;
+      M m({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, a);
+      assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4));
+    }
+  }
+  {
+    // flat_map(initializer_list<value_type>, const key_compare&);
+    using C = test_less<int>;
+    using M = std::flat_map<int, short, C>;
+    auto m  = M({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, C(10));
+    assert(std::equal(m.begin(), m.end(), expected, expected + 4));
+    assert(m.key_comp() == C(10));
+
+    // explicit(false)
+    M m2 = {{{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, C(10)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(10));
+  }
+  {
+    // flat_map(initializer_list<value_type>, const key_compare&);
+    // Sorting uses the comparator that was passed in
+    using M = std::flat_map<int, short, std::function<bool(int, int)>, std::deque<int, min_allocator<int>>>;
+    auto m  = M({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, std::greater<int>());
+    assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4));
+    assert(m.key_comp()(2, 1) == true);
+  }
+  {
+    // flat_map(initializer_list<value_type> il, const key_compare& comp, const Alloc& a);
+    using A = explicit_allocator<int>;
+    using M = std::flat_map<int, int, std::greater<int>, std::deque<int, A>, std::vector<int, A>>;
+    A a;
+    M m({{5, 2}, {2, 2}, {2, 2}, {3, 3}, {1, 1}, {3, 3}}, {}, a);
+    assert(std::equal(m.rbegin(), m.rend(), expected, expected + 4));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp
new file mode 100644
index 000000000000000..7c0c487969943d9
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter.pass.cpp
@@ -0,0 +1,154 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   flat_map(InputIterator first, InputIterator last, const key_compare& comp = key_compare());
+// template<class InputIterator, class Allocator>
+//   flat_map(InputIterator first, InputIterator last, const Allocator& a);
+// template<class InputIterator, class Allocator>
+//   flat_map(InputIterator first, InputIterator last, const key_compare& comp, const Allocator& a);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C     = test_less<int>;
+    using A1    = test_allocator<int>;
+    using A2    = other_allocator<int>;
+    using V1    = std::vector<int, A1>;
+    using V2    = std::vector<int, A2>;
+    using M1    = std::flat_map<int, int, C, V1, V1>;
+    using M2    = std::flat_map<int, int, C, V1, V2>;
+    using M3    = std::flat_map<int, int, C, V2, V1>;
+    using Iter1 = typename M1::iterator;
+    using Iter2 = typename M2::iterator;
+    using Iter3 = typename M3::iterator;
+    static_assert(std::is_constructible_v<M1, Iter1, Iter1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, Iter1, Iter1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, Iter2, Iter2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, Iter3, Iter3, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, Iter1, Iter1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, Iter1, Iter1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, Iter2, Iter2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, Iter3, Iter3, const C&, const A2&>);
+  }
+
+  using P      = std::pair<int, short>;
+  P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+  P expected[] = {{1, 1}, {2, 4}, {3, 6}};
+  {
+    // flat_map(InputIterator , InputIterator)
+    // cpp17_input_iterator
+    using M = std::flat_map<int, short>;
+    auto m  = M(cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+
+    // explicit(false)
+    M m2 = {cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9)};
+    assert(m2 == m);
+  }
+  {
+    // flat_map(InputIterator , InputIterator)
+    // greater
+    using M = std::flat_map<int, short, std::greater<int>, std::deque<int, min_allocator<int>>, std::deque<short>>;
+    auto m  = M(cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9));
+    assert((m.keys() == std::deque<int, min_allocator<int>>{3, 2, 1}));
+    LIBCPP_ASSERT((m.values() == std::deque<short>{6, 4, 1}));
+  }
+  {
+    // flat_map(InputIterator , InputIterator)
+    // Test when the operands are of array type (also contiguous iterator type)
+    using M = std::flat_map<int, short, std::greater<int>, std::vector<int, min_allocator<int>>>;
+    auto m  = M(ar, ar);
+    assert(m.empty());
+  }
+  {
+    // flat_map(InputIterator , InputIterator, const key_compare&)
+    using C = test_less<int>;
+    using M = std::flat_map<int, short, C, std::vector<int>, std::deque<short>>;
+    auto m  = M(ar, ar + 9, C(3));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+
+    // explicit(false)
+    M m2 = {ar, ar + 9, C(3)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(3));
+  }
+  {
+    // flat_map(InputIterator , InputIterator, const Allocator&)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m   = M(ar, ar + 9, A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(InputIterator , InputIterator, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    M m      = {ar, ar + 9, A1(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(InputIterator , InputIterator, const key_compare&, const Allocator&)
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m   = M(ar, ar + 9, C(3), A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(InputIterator , InputIterator, const key_compare&, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, std::less<int>, std::deque<int, A1>, std::vector<short, A2>>;
+    M m      = {ar, ar + 9, {}, A2(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp
new file mode 100644
index 000000000000000..1ce859f6c737ea4
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/iter_iter_stability.pass.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class InputIterator>
+//   flat_map(InputIterator first, InputIterator last, const key_compare& comp = key_compare())
+//
+// libc++ uses stable_sort to ensure that flat_map's behavior matches map's,
+// in terms of which duplicate items are kept.
+// This tests a conforming extension.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <flat_map>
+#include <random>
+#include <map>
+
+#include "test_macros.h"
+
+struct Mod256 {
+  bool operator()(int x, int y) const { return (x % 256) < (y % 256); }
+};
+
+int main(int, char**) {
+  std::mt19937 randomness;
+  std::pair<uint16_t, uint16_t> pairs[200];
+  for (auto& pair : pairs) {
+    pair = {uint16_t(randomness()), uint16_t(randomness())};
+  }
+
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200);
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200);
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200, std::allocator<int>());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200, std::allocator<int>());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200, Mod256());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200, Mod256());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  {
+    std::map<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200, Mod256(), std::allocator<int>());
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(pairs, pairs + 200, Mod256(), std::allocator<int>());
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp
new file mode 100644
index 000000000000000..955d3156064aae5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move.pass.cpp
@@ -0,0 +1,88 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(flat_map&&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+int main(int, char**) {
+  {
+    using C = test_less<int>;
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, C, std::vector<int, A>, std::deque<int, A>>;
+    M mo    = M({{1, 1}, {2, 2}, {3, 1}}, C(5), A(7));
+    M m     = std::move(mo);
+    assert((m == M{{1, 1}, {2, 2}, {3, 1}}));
+    assert(m.key_comp() == C(5));
+    assert(m.keys().get_allocator() == A(7));
+    assert(m.values().get_allocator() == A(7));
+
+    assert(mo.empty());
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys().get_allocator().get_id() == test_alloc_base::moved_value);
+    assert(mo.values().get_allocator().get_id() == test_alloc_base::moved_value);
+  }
+  {
+    using C = test_less<int>;
+    using A = min_allocator<int>;
+    using M = std::flat_map<int, int, C, std::vector<int, A>, std::deque<int, A>>;
+    M mo    = M({{1, 1}, {2, 2}, {3, 1}}, C(5), A());
+    M m     = std::move(mo);
+    assert((m == M{{1, 1}, {2, 2}, {3, 1}}));
+    assert(m.key_comp() == C(5));
+    assert(m.keys().get_allocator() == A());
+    assert(m.values().get_allocator() == A());
+
+    assert(mo.empty());
+    assert(mo.key_comp() == C(5));
+    assert(m.keys().get_allocator() == A());
+    assert(m.values().get_allocator() == A());
+  }
+  {
+    // A moved-from flat_map maintains its class invariant in the presence of moved-from comparators.
+    using M = std::flat_map<int, int, std::function<bool(int, int)>>;
+    M mo    = M({{1, 1}, {2, 2}, {3, 1}}, std::less<int>());
+    M m     = std::move(mo);
+    assert(m.size() == 3);
+    assert(std::is_sorted(m.begin(), m.end(), m.value_comp()));
+    assert(m.key_comp()(1, 2) == true);
+
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    LIBCPP_ASSERT(m.key_comp()(1, 2) == true);
+    LIBCPP_ASSERT(mo.empty());
+    mo.insert({{1, 1}, {2, 2}, {3, 1}}); // insert has no preconditions
+    assert(m == mo);
+  }
+  {
+    // moved-from object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_map<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m1    = M({1, 2, 3}, {1, 2, 3});
+    M m2    = std::move(m1);
+    assert(m2.size() == 3);
+    check_invariant(m1);
+    LIBCPP_ASSERT(m1.empty());
+    LIBCPP_ASSERT(m1.keys().size() == 0);
+    LIBCPP_ASSERT(m1.values().size() == 0);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp
new file mode 100644
index 000000000000000..93a397642252005
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_alloc.pass.cpp
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(flat_map&&, const allocator_type&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, M1&&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, M1&&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, M2&&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, M3&&, const A2&>);
+  }
+  {
+    std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 1}};
+    using C                        = test_less<int>;
+    using A                        = test_allocator<int>;
+    using M                        = std::flat_map<int, int, C, std::vector<int, A>, std::deque<int, A>>;
+    auto mo                        = M(expected, expected + 3, C(5), A(7));
+    auto m                         = M(std::move(mo), A(3));
+
+    assert(m.key_comp() == C(5));
+    assert(m.size() == 3);
+    auto [keys, values] = std::move(m).extract();
+    assert(keys.get_allocator() == A(3));
+    assert(values.get_allocator() == A(3));
+    assert(std::ranges::equal(keys, expected | std::views::elements<0>));
+    assert(std::ranges::equal(values, expected | std::views::elements<1>));
+
+    // The original flat_map is moved-from.
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    assert(mo.empty());
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys().get_allocator() == A(7));
+    assert(mo.values().get_allocator() == A(7));
+  }
+  {
+    // moved-from object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_map<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m1    = M({1, 2, 3}, {1, 2, 3});
+    M m2(std::move(m1), std::allocator<int>{});
+    assert(m2.size() == 3);
+    check_invariant(m1);
+    LIBCPP_ASSERT(m1.empty());
+    LIBCPP_ASSERT(m1.keys().size() == 0);
+    LIBCPP_ASSERT(m1.values().size() == 0);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp
new file mode 100644
index 000000000000000..a94c442c695ddb5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign.pass.cpp
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map& operator=(flat_map&&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "../../../test_compare.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+int main(int, char**) {
+  {
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<char>;
+    using M  = std::flat_map<int, char, C, std::vector<int, A1>, std::vector<char, A2>>;
+    M mo     = M({{1, 1}, {2, 3}, {3, 2}}, C(5), A1(7));
+    M m      = M({}, C(3), A1(7));
+    m        = std::move(mo);
+    assert((m == M{{1, 1}, {2, 3}, {3, 2}}));
+    assert(m.key_comp() == C(5));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator() == A1(7));
+    assert(vs.get_allocator() == A2(7));
+    assert(mo.empty());
+  }
+  {
+    using C  = test_less<int>;
+    using A1 = other_allocator<int>;
+    using A2 = other_allocator<char>;
+    using M  = std::flat_map<int, char, C, std::deque<int, A1>, std::deque<char, A2>>;
+    M mo     = M({{4, 5}, {5, 4}}, C(5), A1(7));
+    M m      = M({{1, 1}, {2, 2}, {3, 3}, {4, 4}}, C(3), A1(7));
+    m        = std::move(mo);
+    assert((m == M{{4, 5}, {5, 4}}));
+    assert(m.key_comp() == C(5));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator() == A1(7));
+    assert(vs.get_allocator() == A2(7));
+    assert(mo.empty());
+  }
+  {
+    using A = min_allocator<int>;
+    using M = std::flat_map<int, int, std::greater<int>, std::vector<int, A>, std::vector<int, A>>;
+    M mo    = M({{5, 1}, {4, 2}, {3, 3}}, A());
+    M m     = M({{4, 4}, {3, 3}, {2, 2}, {1, 1}}, A());
+    m       = std::move(mo);
+    assert((m == M{{5, 1}, {4, 2}, {3, 3}}));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator() == A());
+    assert(vs.get_allocator() == A());
+    assert(mo.empty());
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp
new file mode 100644
index 000000000000000..f28d52dd4e46332
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_clears.pass.cpp
@@ -0,0 +1,104 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map& operator=(flat_map&&);
+// Preserves the class invariant for the moved-from flat_map.
+
+#include <algorithm>
+#include <cassert>
+#include <compare>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+
+struct MoveNegates {
+  int value_    = 0;
+  MoveNegates() = default;
+  MoveNegates(int v) : value_(v) {}
+  MoveNegates(MoveNegates&& rhs) : value_(rhs.value_) { rhs.value_ = -rhs.value_; }
+  MoveNegates& operator=(MoveNegates&& rhs) {
+    value_     = rhs.value_;
+    rhs.value_ = -rhs.value_;
+    return *this;
+  }
+  ~MoveNegates()                             = default;
+  auto operator<=>(const MoveNegates&) const = default;
+};
+
+struct MoveClears {
+  int value_   = 0;
+  MoveClears() = default;
+  MoveClears(int v) : value_(v) {}
+  MoveClears(MoveClears&& rhs) : value_(rhs.value_) { rhs.value_ = 0; }
+  MoveClears& operator=(MoveClears&& rhs) {
+    value_     = rhs.value_;
+    rhs.value_ = 0;
+    return *this;
+  }
+  ~MoveClears()                             = default;
+  auto operator<=>(const MoveClears&) const = default;
+};
+
+int main(int, char**) {
+  auto value_eq = [](auto&& p, auto&& q) { return p.first == q.first; };
+  {
+    const std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+    using M = std::flat_map<MoveNegates, int, std::less<MoveNegates>, std::vector<MoveNegates>>;
+    M m     = M(expected, expected + 8);
+    M m2    = M(expected, expected + 3);
+
+    m2 = std::move(m);
+
+    assert(std::equal(m2.begin(), m2.end(), expected, expected + 8));
+    LIBCPP_ASSERT(m.empty());
+    assert(std::is_sorted(m.begin(), m.end(), m.value_comp()));          // still sorted
+    assert(std::adjacent_find(m.begin(), m.end(), value_eq) == m.end()); // still contains no duplicates
+    m.insert({1, 1});
+    m.insert({2, 2});
+    assert(m.contains(1));
+    assert(m.find(2) != m.end());
+  }
+  {
+    const std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+    using M = std::flat_map<MoveClears, int, std::less<MoveClears>, std::vector<MoveClears>>;
+    M m     = M(expected, expected + 8);
+    M m2    = M(expected, expected + 3);
+
+    m2 = std::move(m);
+
+    assert(std::equal(m2.begin(), m2.end(), expected, expected + 8));
+    LIBCPP_ASSERT(m.empty());
+    assert(std::is_sorted(m.begin(), m.end(), m.value_comp()));          // still sorted
+    assert(std::adjacent_find(m.begin(), m.end(), value_eq) == m.end()); // still contains no duplicates
+    m.insert({1, 1});
+    m.insert({2, 2});
+    assert(m.contains(1));
+    assert(m.find(2) != m.end());
+  }
+  {
+    // moved-from object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_map<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m1    = M({1, 2, 3}, {1, 2, 3});
+    M m2    = M({1, 2}, {1, 2});
+    m2      = std::move(m1);
+    assert(m2.size() == 3);
+    check_invariant(m1);
+    LIBCPP_ASSERT(m1.empty());
+    LIBCPP_ASSERT(m1.keys().size() == 0);
+    LIBCPP_ASSERT(m1.values().size() == 0);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp
new file mode 100644
index 000000000000000..665b763e6c4f751
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_assign_noexcept.pass.cpp
@@ -0,0 +1,110 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map& operator=(flat_map&& c)
+//     noexcept(
+//          is_nothrow_move_assignable<key_container_type>::value &&
+//          is_nothrow_move_assignable<mapped_container_type>::value &&
+//          is_nothrow_copy_assignable<key_compare>::value);
+
+// This tests a conforming extension
+
+#include <flat_map>
+#include <functional>
+#include <memory_resource>
+#include <type_traits>
+#include <vector>
+
+#include "MoveOnly.h"
+#include "test_allocator.h"
+#include "test_macros.h"
+
+struct MoveSensitiveComp {
+  MoveSensitiveComp() noexcept(false)                         = default;
+  MoveSensitiveComp(const MoveSensitiveComp&) noexcept(false) = default;
+  MoveSensitiveComp(MoveSensitiveComp&& rhs) { rhs.is_moved_from_ = true; }
+  MoveSensitiveComp& operator=(const MoveSensitiveComp&) noexcept = default;
+  MoveSensitiveComp& operator=(MoveSensitiveComp&& rhs) {
+    rhs.is_moved_from_ = true;
+    return *this;
+  }
+  bool operator()(const auto&, const auto&) const { return false; }
+  bool is_moved_from_ = false;
+};
+
+struct MoveThrowsComp {
+  MoveThrowsComp(MoveThrowsComp&&) noexcept(false);
+  MoveThrowsComp(const MoveThrowsComp&) noexcept(true);
+  MoveThrowsComp& operator=(MoveThrowsComp&&) noexcept(false);
+  MoveThrowsComp& operator=(const MoveThrowsComp&) noexcept(true);
+  bool operator()(const auto&, const auto&) const;
+};
+
+int main(int, char**) {
+  {
+    using C = std::flat_map<int, int>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_map<MoveOnly,
+                      int,
+                      std::less<MoveOnly>,
+                      std::vector<MoveOnly, test_allocator<MoveOnly>>,
+                      std::vector<int, test_allocator<int>>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_map<int,
+                      MoveOnly,
+                      std::less<int>,
+                      std::vector<int, test_allocator<int>>,
+                      std::vector<MoveOnly, test_allocator<MoveOnly>>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_map<MoveOnly,
+                      int,
+                      std::less<MoveOnly>,
+                      std::vector<MoveOnly, other_allocator<MoveOnly>>,
+                      std::vector<int, other_allocator<int>>>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    using C =
+        std::flat_map<int,
+                      MoveOnly,
+                      std::less<int>,
+                      std::vector<int, other_allocator<int>>,
+                      std::vector<MoveOnly, other_allocator<MoveOnly>>>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    // Test with a comparator that throws on move-assignment.
+    using C = std::flat_map<int, int, MoveThrowsComp>;
+    LIBCPP_STATIC_ASSERT(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    // Test with a container that throws on move-assignment.
+    using C = std::flat_map<int, int, std::less<int>, std::pmr::vector<int>, std::vector<int>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+  {
+    // Test with a container that throws on move-assignment.
+    using C = std::flat_map<int, int, std::less<int>, std::vector<int>, std::pmr::vector<int>>;
+    static_assert(!std::is_nothrow_move_assignable_v<C>);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp
new file mode 100644
index 000000000000000..cb7e30c2b74fae2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_exceptions.pass.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: no-exceptions
+
+// <flat_map>
+
+// flat_map(flat_map&& s);
+// If any member function in [flat.map.defn] exits via an exception, the invariant is restored.
+
+#include <algorithm>
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+
+static int countdown = 0;
+
+struct EvilContainer : std::vector<int> {
+  EvilContainer() = default;
+  EvilContainer(EvilContainer&& rhs) {
+    // Throw on move-construction.
+    if (--countdown == 0) {
+      rhs.insert(rhs.end(), 0);
+      rhs.insert(rhs.end(), 0);
+      throw 42;
+    }
+  }
+};
+
+int main(int, char**) {
+  {
+    using M   = std::flat_map<int, int, std::less<int>, EvilContainer, std::vector<int>>;
+    M mo      = {{1, 1}, {2, 2}, {3, 3}};
+    countdown = 1;
+    try {
+      M m = std::move(mo);
+      assert(false); // not reached
+    } catch (int x) {
+      assert(x == 42);
+    }
+    // The source flat_map maintains its class invariant.
+    check_invariant(mo);
+    LIBCPP_ASSERT(mo.empty());
+  }
+  {
+    using M   = std::flat_map<int, int, std::less<int>, std::vector<int>, EvilContainer>;
+    M mo      = {{1, 1}, {2, 2}, {3, 3}};
+    countdown = 1;
+    try {
+      M m = std::move(mo);
+      assert(false); // not reached
+    } catch (int x) {
+      assert(x == 42);
+    }
+    // The source flat_map maintains its class invariant.
+    check_invariant(mo);
+    LIBCPP_ASSERT(mo.empty());
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp
new file mode 100644
index 000000000000000..d281dafbcf72dd8
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/move_noexcept.pass.cpp
@@ -0,0 +1,102 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(flat_map&&)
+//        noexcept(is_nothrow_move_constructible<key_container_type>::value &&
+//                 is_nothrow_move_constructible<mapped_container_type>::value &&
+//                 is_nothrow_copy_constructible<key_compare>::value);
+
+// This tests a conforming extension
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include "test_macros.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+
+template <class T>
+struct ThrowingMoveAllocator {
+  using value_type                                    = T;
+  explicit ThrowingMoveAllocator()                    = default;
+  ThrowingMoveAllocator(const ThrowingMoveAllocator&) = default;
+  ThrowingMoveAllocator(ThrowingMoveAllocator&&) noexcept(false) {}
+  T* allocate(std::ptrdiff_t n) { return std::allocator<T>().allocate(n); }
+  void deallocate(T* p, std::ptrdiff_t n) { return std::allocator<T>().deallocate(p, n); }
+  friend bool operator==(ThrowingMoveAllocator, ThrowingMoveAllocator) = default;
+};
+
+struct ThrowingMoveComp {
+  ThrowingMoveComp() = default;
+  ThrowingMoveComp(const ThrowingMoveComp&) noexcept(true) {}
+  ThrowingMoveComp(ThrowingMoveComp&&) noexcept(false) {}
+  bool operator()(const auto&, const auto&) const { return false; }
+};
+
+struct MoveSensitiveComp {
+  MoveSensitiveComp() noexcept(false)                  = default;
+  MoveSensitiveComp(const MoveSensitiveComp&) noexcept = default;
+  MoveSensitiveComp(MoveSensitiveComp&& rhs) { rhs.is_moved_from_ = true; }
+  MoveSensitiveComp& operator=(const MoveSensitiveComp&) noexcept(false) = default;
+  MoveSensitiveComp& operator=(MoveSensitiveComp&& rhs) {
+    rhs.is_moved_from_ = true;
+    return *this;
+  }
+  bool operator()(const auto&, const auto&) const { return false; }
+  bool is_moved_from_ = false;
+};
+
+int main(int, char**) {
+  {
+    using C = std::flat_map<int, int>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+  {
+    using C = std::flat_map<int, int, std::less<int>, std::deque<int, test_allocator<int>>>;
+    LIBCPP_STATIC_ASSERT(std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+#if _LIBCPP_VERSION
+  {
+    // Container fails to be nothrow-move-constructible; this relies on libc++'s support for non-nothrow-copyable allocators
+    using C = std::flat_map<int, int, std::less<int>, std::deque<int, ThrowingMoveAllocator<int>>, std::vector<int>>;
+    static_assert(!std::is_nothrow_move_constructible_v<std::deque<int, ThrowingMoveAllocator<int>>>);
+    static_assert(!std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+  {
+    // Container fails to be nothrow-move-constructible; this relies on libc++'s support for non-nothrow-copyable allocators
+    using C = std::flat_map<int, int, std::less<int>, std::vector<int>, std::deque<int, ThrowingMoveAllocator<int>>>;
+    static_assert(!std::is_nothrow_move_constructible_v<std::deque<int, ThrowingMoveAllocator<int>>>);
+    static_assert(!std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+#endif // _LIBCPP_VERSION
+  {
+    // Comparator fails to be nothrow-move-constructible
+    using C = std::flat_map<int, int, ThrowingMoveComp>;
+    static_assert(!std::is_nothrow_move_constructible_v<C>);
+    C c;
+    C d = std::move(c);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp
new file mode 100644
index 000000000000000..154af11bb9b4db2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/pmr.pass.cpp
@@ -0,0 +1,361 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: availability-pmr-missing
+
+// <flat_map>
+
+// Test various constructors with pmr
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <memory_resource>
+#include <ranges>
+#include <vector>
+#include <string>
+
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // flat_map(const Allocator& a);
+    using M = std::flat_map<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::polymorphic_allocator<int> pa = &mr;
+    auto m1                                 = M(pa);
+    assert(m1.empty());
+    assert(m1.keys().get_allocator() == pa);
+    assert(m1.values().get_allocator() == pa);
+    auto m2 = M(&mr);
+    assert(m2.empty());
+    assert(m2.keys().get_allocator() == pa);
+    assert(m2.values().get_allocator() == pa);
+  }
+  {
+    // flat_map(const key_compare& comp, const Alloc& a);
+    using M = std::flat_map<int, int, std::function<bool(int, int)>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    vm.emplace_back(std::greater<int>());
+    assert(vm[0] == M{});
+    assert(vm[0].key_comp()(2, 1) == true);
+    assert(vm[0].value_comp()({2, 0}, {1, 0}) == true);
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(const key_container_type& key_cont, const mapped_container_type& mapped_cont,
+    //          const Allocator& a);
+    using M = std::flat_map<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pmr::vector<int> ks = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    std::pmr::vector<int> vs = {1, 1, 1, 2, 2, 3, 2, 3, 3};
+    assert(ks.get_allocator().resource() != &mr);
+    assert(vs.get_allocator().resource() != &mr);
+    vm.emplace_back(ks, vs);
+    assert(ks.size() == 9); // ks' value is unchanged, since it was an lvalue above
+    assert(vs.size() == 9); // vs' value is unchanged, since it was an lvalue above
+    assert((vm[0] == M{{1, 1}, {2, 2}, {3, 3}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(const flat_map&, const allocator_type&);
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({1, 2, 3}, {2, 2, 1}, C(5), &mr1);
+    M m  = {mo, &mr2}; // also test the implicitness of this constructor
+
+    assert(m.key_comp() == C(5));
+    assert((m.keys() == std::pmr::vector<int>{1, 2, 3}));
+    assert((m.values() == std::pmr::vector<int>{2, 2, 1}));
+    assert(m.keys().get_allocator().resource() == &mr2);
+    assert(m.values().get_allocator().resource() == &mr2);
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert((mo.keys() == std::pmr::vector<int>{1, 2, 3}));
+    assert((mo.values() == std::pmr::vector<int>{2, 2, 1}));
+    assert(mo.keys().get_allocator().resource() == &mr1);
+    assert(mo.values().get_allocator().resource() == &mr1);
+  }
+  {
+    // flat_map(const flat_map&, const allocator_type&);
+    using M = std::flat_map<int, int, std::less<>, std::pmr::vector<int>, std::pmr::deque<int>>;
+    std::pmr::vector<M> vs;
+    M m = {{1, 2}, {2, 2}, {3, 1}};
+    vs.push_back(m);
+    assert(vs[0] == m);
+  }
+  {
+    // flat_map& operator=(const flat_map& m);
+    // pmr allocator is not propagated
+    using M = std::flat_map<int, int, std::less<>, std::pmr::deque<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({{1, 1}, {2, 2}, {3, 3}}, &mr1);
+    M m  = M({{4, 4}, {5, 5}}, &mr2);
+    m    = mo;
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}}));
+    assert(m.keys().get_allocator().resource() == &mr2);
+    assert(m.values().get_allocator().resource() == &mr2);
+
+    // mo is unchanged
+    assert((mo == M{{1, 1}, {2, 2}, {3, 3}}));
+    assert(mo.keys().get_allocator().resource() == &mr1);
+  }
+  {
+    // flat_map(const flat_map& m);
+    using C = test_less<int>;
+    std::pmr::monotonic_buffer_resource mr;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    auto mo = M({{1, 1}, {2, 2}, {3, 3}}, C(5), &mr);
+    auto m  = mo;
+
+    assert(m.key_comp() == C(5));
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}}));
+    auto [ks, vs] = std::move(m).extract();
+    assert(ks.get_allocator().resource() == std::pmr::get_default_resource());
+    assert(vs.get_allocator().resource() == std::pmr::get_default_resource());
+
+    // mo is unchanged
+    assert(mo.key_comp() == C(5));
+    assert((mo == M{{1, 1}, {2, 2}, {3, 3}}));
+    auto [kso, vso] = std::move(mo).extract();
+    assert(kso.get_allocator().resource() == &mr);
+    assert(vso.get_allocator().resource() == &mr);
+  }
+  {
+    //  flat_map(initializer_list<value_type> il, const Alloc& a);
+    using M = std::flat_map<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::initializer_list<M::value_type> il = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    vm.emplace_back(il);
+    assert((vm[0] == M{{1, 1}, {3, 3}, {4, 4}, {5, 5}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    //  flat_map(initializer_list<value_type> il, const key_compare& comp, const Alloc& a);
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::deque<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::initializer_list<M::value_type> il = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    vm.emplace_back(il, C(5));
+    assert((vm[0] == M{{1, 1}, {3, 3}, {4, 4}, {5, 5}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+    assert(vm[0].key_comp() == C(5));
+  }
+  {
+    // flat_map(InputIterator first, InputIterator last, const Allocator& a);
+    using P      = std::pair<int, short>;
+    P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+    P expected[] = {{1, 1}, {2, 4}, {3, 6}};
+    {
+      //  cpp17 iterator
+      using M = std::flat_map<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 9));
+      assert(std::ranges::equal(vm[0].keys(), expected | std::views::elements<0>));
+      LIBCPP_ASSERT(std::ranges::equal(vm[0], expected));
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+    {
+      using M = std::flat_map<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(ar, ar);
+      assert(vm[0].empty());
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+  }
+  {
+    // flat_map(flat_map&&, const allocator_type&);
+    std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 1}};
+    using C                        = test_less<int>;
+    using M                        = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::deque<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({{1, 1}, {3, 1}, {1, 1}, {2, 2}}, C(5), &mr1);
+    M m  = {std::move(mo), &mr2}; // also test the implicitness of this constructor
+
+    assert(m.key_comp() == C(5));
+    assert(m.size() == 3);
+    assert(m.keys().get_allocator().resource() == &mr2);
+    assert(m.values().get_allocator().resource() == &mr2);
+    assert(std::equal(m.begin(), m.end(), expected, expected + 3));
+
+    // The original flat_map is moved-from.
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    assert(mo.key_comp() == C(5));
+    assert(mo.keys().get_allocator().resource() == &mr1);
+    assert(mo.values().get_allocator().resource() == &mr1);
+  }
+  {
+    // flat_map(flat_map&&, const allocator_type&);
+    using M = std::flat_map<int, int, std::less<>, std::pmr::deque<int>, std::pmr::vector<int>>;
+    std::pmr::vector<M> vs;
+    M m = {{1, 1}, {3, 1}, {1, 1}, {2, 2}};
+    vs.push_back(std::move(m));
+    assert((vs[0].keys() == std::pmr::deque<int>{1, 2, 3}));
+    assert((vs[0].values() == std::pmr::vector<int>{1, 2, 1}));
+  }
+  {
+    // flat_map& operator=(flat_map&&);
+    using M =
+        std::flat_map<std::pmr::string, int, std::less<>, std::pmr::vector<std::pmr::string>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr1;
+    std::pmr::monotonic_buffer_resource mr2;
+    M mo = M({{"short", 1},
+              {"very long string that definitely won't fit in the SSO buffer and therefore becomes empty on move", 2}},
+             &mr1);
+    M m  = M({{"don't care", 3}}, &mr2);
+    m    = std::move(mo);
+    assert(m.size() == 2);
+    assert(std::is_sorted(m.begin(), m.end(), m.value_comp()));
+    assert(m.begin()->first.get_allocator().resource() == &mr2);
+
+    assert(std::is_sorted(mo.begin(), mo.end(), mo.value_comp()));
+    mo.insert({"foo", 1});
+    assert(mo.begin()->first.get_allocator().resource() == &mr1);
+  }
+  {
+    //  flat_map(from_range_t, R&&, const Alloc&);
+    using P      = std::pair<int, short>;
+    P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+    P expected[] = {{1, 1}, {2, 4}, {3, 6}};
+    {
+      // input_range
+      using M    = std::flat_map<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      using Iter = cpp20_input_iterator<const P*>;
+      using Sent = sentinel_wrapper<Iter>;
+      using R    = std::ranges::subrange<Iter, Sent>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(std::from_range, R(Iter(ar), Sent(Iter(ar + 9))));
+      assert(std::ranges::equal(vm[0].keys(), expected | std::views::elements<0>));
+      LIBCPP_ASSERT(std::ranges::equal(vm[0], expected));
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+    {
+      using M = std::flat_map<int, short, std::less<int>, std::pmr::vector<int>, std::pmr::vector<short>>;
+      using R = std::ranges::subrange<const P*>;
+      std::pmr::monotonic_buffer_resource mr;
+      std::pmr::vector<M> vm(&mr);
+      vm.emplace_back(std::from_range, R(ar, ar));
+      assert(vm[0].empty());
+      assert(vm[0].keys().get_allocator().resource() == &mr);
+      assert(vm[0].values().get_allocator().resource() == &mr);
+    }
+  }
+  {
+    // flat_map(sorted_unique_t, const key_container_type& key_cont,
+    //          const mapped_container_type& mapped_cont, const Alloc& a);
+    using M = std::flat_map<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pmr::vector<int> ks = {1, 2, 4, 10};
+    std::pmr::vector<int> vs = {4, 3, 2, 1};
+    vm.emplace_back(std::sorted_unique, ks, vs);
+    assert(!ks.empty()); // it was an lvalue above
+    assert(!vs.empty()); // it was an lvalue above
+    assert((vm[0] == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(sorted_unique_t, const key_container_type& key_cont,
+    //          const mapped_container_type& mapped_cont, const Alloc& a);
+    using M = std::flat_map<int, int, std::less<int>, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pmr::vector<int> ks({1, 2, 4, 10}, &mr);
+    std::pmr::vector<int> vs({4, 3, 2, 1}, &mr);
+    vm.emplace_back(std::sorted_unique, ks, vs);
+    assert((vm[0] == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type> il, const Alloc& a);
+    // cpp_17
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    vm.emplace_back(
+        std::sorted_unique, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4), C(3));
+    assert((vm[0] == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}));
+    assert(vm[0].key_comp() == C(3));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type> il, const Alloc& a);
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pair<int, int> ar[1] = {{42, 42}};
+    vm.emplace_back(std::sorted_unique, ar, ar, C(4));
+    assert(vm[0] == M{});
+    assert(vm[0].key_comp() == C(4));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(InputIterator first, InputIterator last, const Alloc& a);
+    // cpp_17
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    vm.emplace_back(
+        std::sorted_unique, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4), C(3));
+    assert((vm[0] == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}));
+    assert(vm[0].key_comp() == C(3));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+  {
+    // flat_map(InputIterator first, InputIterator last, const Alloc& a);
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::pmr::vector<int>, std::pmr::vector<int>>;
+    std::pmr::monotonic_buffer_resource mr;
+    std::pmr::vector<M> vm(&mr);
+    std::pair<int, int> ar[1] = {{42, 42}};
+    vm.emplace_back(std::sorted_unique, ar, ar, C(4));
+    assert(vm[0] == M{});
+    assert(vm[0].key_comp() == C(4));
+    assert(vm[0].keys().get_allocator().resource() == &mr);
+    assert(vm[0].values().get_allocator().resource() == &mr);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp
new file mode 100644
index 000000000000000..282cc71f31994fe
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/range.pass.cpp
@@ -0,0 +1,227 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<container-compatible-range<value_type> R>
+//     flat_map(from_range_t, R&&)
+// template<container-compatible-range<value_type> R>
+//     flat_map(from_range_t, R&&, const key_compare&)
+// template<container-compatible-range<value_type> R, class Alloc>
+//      flat_map(from_range_t, R&&, const Alloc&);
+// template<container-compatible-range<value_type> R, class Alloc>
+//      flat_map(from_range_t, R&&, const key_compare&, const Alloc&);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+// test constraint container-compatible-range
+
+template <class V>
+using RangeOf = std::ranges::subrange<V*>;
+using Map     = std::flat_map<int, double>;
+
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<int, double>>>);
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<short, double>>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>>);
+
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<int, double>>, std::less<int>>);
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<short, double>>, std::less<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>, std::less<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>, std::less<int>>);
+
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<int, double>>, std::allocator<int>>);
+static_assert(std::is_constructible_v<Map, std::from_range_t, RangeOf<std::pair<short, double>>, std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>, std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>, std::allocator<int>>);
+
+static_assert(std::is_constructible_v<Map,
+                                      std::from_range_t,
+                                      RangeOf<std::pair<int, double>>,
+                                      std::less<int>,
+                                      std::allocator<int>>);
+static_assert(std::is_constructible_v<Map,
+                                      std::from_range_t,
+                                      RangeOf<std::pair<short, double>>,
+                                      std::less<int>,
+                                      std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<int>, std::less<int>, std::allocator<int>>);
+static_assert(!std::is_constructible_v<Map, std::from_range_t, RangeOf<double>, std::less<int>, std::allocator<int>>);
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, std::from_range_t, M1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::from_range_t, M1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::from_range_t, M2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::from_range_t, M3, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::from_range_t, M1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::from_range_t, M1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::from_range_t, M2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::from_range_t, M3, const C&, const A2&>);
+  }
+  {
+    // container-compatible-range
+    using C           = test_less<int>;
+    using A1          = test_allocator<int>;
+    using A2          = test_allocator<std::string>;
+    using M           = std::flat_map<int, std::string, C, std::vector<int, A1>, std::vector<std::string, A2>>;
+    using Pair        = std::pair<int, std::string>;
+    using PairLike    = std::tuple<int, std::string>;
+    using NonPairLike = int;
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&>);
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&, const C&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&, const C&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&, const C&>);
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&, const A1&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&, const A1&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&, const A1&>);
+
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<Pair>&, const C&, const A1&>);
+    static_assert(std::is_constructible_v<M, std::from_range_t, std::vector<PairLike>&, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M, std::from_range_t, std::vector<NonPairLike>&, const C&, const A1&>);
+  }
+
+  using P      = std::pair<int, short>;
+  P ar[]       = {{1, 1}, {1, 2}, {1, 3}, {2, 4}, {2, 5}, {3, 6}, {2, 7}, {3, 8}, {3, 9}};
+  P expected[] = {{1, 1}, {2, 4}, {3, 6}};
+  {
+    // flat_map(from_range_t, R&&)
+    // input_range && !common
+    using M    = std::flat_map<int, short>;
+    using Iter = cpp20_input_iterator<const P*>;
+    using Sent = sentinel_wrapper<Iter>;
+    using R    = std::ranges::subrange<Iter, Sent>;
+    auto m     = M(std::from_range, R(Iter(ar), Sent(Iter(ar + 9))));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+
+    // explicit(false)
+    M m2 = {std::from_range, R(Iter(ar), Sent(Iter(ar + 9)))};
+    assert(m2 == m);
+  }
+  {
+    // flat_map(from_range_t, R&&)
+    // greater
+    using M    = std::flat_map<int, short, std::greater<int>, std::deque<int, min_allocator<int>>, std::deque<short>>;
+    using Iter = cpp20_input_iterator<const P*>;
+    using Sent = sentinel_wrapper<Iter>;
+    using R    = std::ranges::subrange<Iter, Sent>;
+    auto m     = M(std::from_range, R(Iter(ar), Sent(Iter(ar + 9))));
+    assert((m.keys() == std::deque<int, min_allocator<int>>{3, 2, 1}));
+    LIBCPP_ASSERT((m.values() == std::deque<short>{6, 4, 1}));
+  }
+  {
+    // flat_map(from_range_t, R&&)
+    // contiguous range
+    using M = std::flat_map<int, short>;
+    using R = std::ranges::subrange<const P*>;
+    auto m  = M(std::from_range, R(ar, ar + 9));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+  }
+  {
+    // flat_map(from_range_t, R&&, const key_compare&)
+    using C = test_less<int>;
+    using M = std::flat_map<int, short, C, std::vector<int>, std::deque<short>>;
+    using R = std::ranges::subrange<const P*>;
+    auto m  = M(std::from_range, R(ar, ar + 9), C(3));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+
+    // explicit(false)
+    M m2 = {std::from_range, R(ar, ar + 9), C(3)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(3));
+  }
+  {
+    // flat_map(from_range_t, R&&, const Allocator&)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    auto m   = M(std::from_range, R(ar, ar + 9), A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(from_range_t, R&&, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    M m      = {std::from_range, R(ar, ar + 9), A1(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(from_range_t, R&&, const key_compare&, const Allocator&)
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    auto m   = M(std::from_range, R(ar, ar + 9), C(3), A1(5));
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(from_range_t, R&&, const key_compare&, const Allocator&)
+    // explicit(false)
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, std::less<int>, std::deque<int, A1>, std::vector<short, A2>>;
+    using R  = std::ranges::subrange<const P*>;
+    M m      = {std::from_range, R(ar, ar + 9), {}, A2(5)}; // implicit ctor
+    assert(std::ranges::equal(m.keys(), expected | std::views::elements<0>));
+    LIBCPP_ASSERT(std::ranges::equal(m, expected));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp
new file mode 100644
index 000000000000000..3c8868f2ff4247d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_container.pass.cpp
@@ -0,0 +1,165 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map(sorted_unique_t, key_container_type key_cont, mapped_container_type mapped_cont,
+//          const key_compare& comp = key_compare());
+//
+// template<class Alloc>
+//   flat_map(sorted_unique_t, const key_container_type& key_cont,
+//            const mapped_container_type& mapped_cont, const Alloc& a);
+// template<class Alloc>
+//   flat_map(sorted_unique_t, const key_container_type& key_cont,
+//            const mapped_container_type& mapped_cont,
+//            const key_compare& comp, const Alloc& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "MoveOnly.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    static_assert(std::is_constructible_v<M1, std::sorted_unique_t, const V1&, const V1&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_unique_t, const V1&, const V1&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_unique_t, const V1&, const V2&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_unique_t, const V2&, const V1&, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::sorted_unique_t, const V1&, const V1&, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_unique_t, const V1&, const V1&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_unique_t, const V1&, const V2&, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_unique_t, const V2&, const V1&, const C&, const A2&>);
+  }
+  {
+    // flat_map(sorted_unique_t, key_container_type , mapped_container_type)
+    using M              = std::flat_map<int, char>;
+    std::vector<int> ks  = {1, 2, 4, 10};
+    std::vector<char> vs = {4, 3, 2, 1};
+    auto ks2             = ks;
+    auto vs2             = vs;
+
+    auto m = M(std::sorted_unique, ks, vs);
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    m = M(std::sorted_unique, std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, std::move(ks2), std::move(vs2)};
+    assert(m == m2);
+  }
+  {
+    // flat_map(sorted_unique_t, key_container_type , mapped_container_type)
+    // non-default container, comparator and allocator type
+    using Ks = std::deque<int, min_allocator<int>>;
+    using Vs = std::deque<char, min_allocator<char>>;
+    using M  = std::flat_map<int, char, std::greater<int>, Ks, Vs>;
+    Ks ks    = {10, 4, 2, 1};
+    Vs vs    = {1, 2, 3, 4};
+    auto m   = M(std::sorted_unique, ks, vs);
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    m = M(std::sorted_unique, std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+  }
+  {
+    // flat_map(sorted_unique_t, key_container_type , mapped_container_type)
+    // allocator copied into the containers
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 2, 4, 10}, A(4));
+    auto vs = std::deque<int, A>({4, 3, 2, 1}, A(5));
+    auto m  = M(std::sorted_unique, std::move(ks), std::move(vs));
+    assert(ks.empty()); // it was moved-from
+    assert(vs.empty()); // it was moved-from
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(m.keys().get_allocator() == A(4));
+    assert(m.values().get_allocator() == A(5));
+  }
+  {
+    // flat_map(sorted_unique_t, key_container_type , mapped_container_type, key_compare)
+    using C              = test_less<int>;
+    using M              = std::flat_map<int, char, C>;
+    std::vector<int> ks  = {1, 2, 4, 10};
+    std::vector<char> vs = {4, 3, 2, 1};
+
+    auto m = M(std::sorted_unique, ks, vs, C(4));
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(m.key_comp() == C(4));
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, ks, vs, C(4)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+  }
+  {
+    // flat_map(sorted_unique_t, key_container_type , mapped_container_type, key_compare, const Allocator&)
+    using C                = test_less<int>;
+    using A                = test_allocator<int>;
+    using M                = std::flat_map<int, int, C, std::vector<int, A>, std::vector<int, A>>;
+    std::vector<int, A> ks = {1, 2, 4, 10};
+    std::vector<int, A> vs = {4, 3, 2, 1};
+    auto m                 = M(std::sorted_unique, ks, vs, C(4), A(5));
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(m.key_comp() == C(4));
+    assert(m.keys().get_allocator() == A(5));
+    assert(m.values().get_allocator() == A(5));
+
+    // explicit(false)
+    M m2 = {ks, vs, C(4), A(5)};
+    assert(m2 == m);
+    assert(m2.key_comp() == C(4));
+    assert(m2.keys().get_allocator() == A(5));
+    assert(m2.values().get_allocator() == A(5));
+  }
+  {
+    // flat_map(sorted_unique_t, key_container_type , mapped_container_type, const Allocator&)
+    using A = test_allocator<int>;
+    using M = std::flat_map<int, int, std::less<int>, std::vector<int, A>, std::deque<int, A>>;
+    auto ks = std::vector<int, A>({1, 2, 4, 10}, A(4));
+    auto vs = std::deque<int, A>({4, 3, 2, 1}, A(5));
+    auto m  = M(std::sorted_unique, ks, vs, A(6)); // replaces the allocators
+    assert(!ks.empty());                           // it was an lvalue above
+    assert(!vs.empty());                           // it was an lvalue above
+    assert((m == M{{1, 4}, {2, 3}, {4, 2}, {10, 1}}));
+    assert(m.keys().get_allocator() == A(6));
+    assert(m.values().get_allocator() == A(6));
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, ks, vs, A(6)};
+    assert(m2 == m);
+    assert(m2.keys().get_allocator() == A(6));
+    assert(m2.values().get_allocator() == A(6));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp
new file mode 100644
index 000000000000000..26452472ba20112
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_initializer_list.pass.cpp
@@ -0,0 +1,179 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   flat_map(sorted_unique_t s, initializer_list<value_type> il,
+//            const key_compare& comp = key_compare())
+// template<class Alloc>
+//   flat_map(sorted_unique_t, initializer_list<value_type> il, const Alloc& a);
+// template<class Alloc>
+//   flat_map(sorted_unique_t, initializer_list<value_type> il,
+//            const key_compare& comp, const Alloc& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+template <class T, class U>
+std::initializer_list<std::pair<T, U>> il = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+
+const auto il1 = il<int, int>;
+const auto il2 = il<int, short>;
+const auto il3 = il<short, int>;
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = other_allocator<int>;
+    using V1 = std::vector<int, A1>;
+    using V2 = std::vector<int, A2>;
+    using M1 = std::flat_map<int, int, C, V1, V1>;
+    using M2 = std::flat_map<int, int, C, V1, V2>;
+    using M3 = std::flat_map<int, int, C, V2, V1>;
+    using IL = std::initializer_list<std::pair<int, int>>;
+    static_assert(std::is_constructible_v<M1, std::sorted_unique_t, IL, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_unique_t, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_unique_t, IL, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_unique_t, IL, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::sorted_unique_t, IL, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_unique_t, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_unique_t, IL, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_unique_t, IL, const C&, const A2&>);
+  }
+  {
+    // initializer_list<value_type> needs to match exactly
+    using M = std::flat_map<int, short>;
+    using C = typename M::key_compare;
+    static_assert(std::is_constructible_v<M, std::sorted_unique_t, std::initializer_list<std::pair<int, short>>>);
+    static_assert(std::is_constructible_v<M, std::sorted_unique_t, std::initializer_list<std::pair<int, short>>, C>);
+    static_assert(std::is_constructible_v<M,
+                                          std::sorted_unique_t,
+                                          std::initializer_list<std::pair<int, short>>,
+                                          C,
+                                          std::allocator<int>>);
+    static_assert(std::is_constructible_v<M,
+                                          std::sorted_unique_t,
+                                          std::initializer_list<std::pair<int, short>>,
+                                          std::allocator<int>>);
+    static_assert(
+        !std::is_constructible_v<M, std::sorted_unique_t, std::initializer_list<std::pair<const int, short>>>);
+    static_assert(
+        !std::is_constructible_v<M, std::sorted_unique_t, std::initializer_list<std::pair<const int, short>>, C>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_unique_t,
+                                           std::initializer_list<std::pair<const int, short>>,
+                                           C,
+                                           std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_unique_t,
+                                           std::initializer_list<std::pair<const int, short>>,
+                                           std::allocator<int>>);
+    static_assert(
+        !std::is_constructible_v<M, std::sorted_unique_t, std::initializer_list<std::pair<const int, const short>>>);
+    static_assert(
+        !std::is_constructible_v<M, std::sorted_unique_t, std::initializer_list<std::pair<const int, const short>>, C>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_unique_t,
+                                           std::initializer_list<std::pair<const int, const short>>,
+                                           C,
+                                           std::allocator<int>>);
+    static_assert(!std::is_constructible_v<M,
+                                           std::sorted_unique_t,
+                                           std::initializer_list<std::pair<const int, const short>>,
+                                           std::allocator<int>>);
+  }
+
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type>);
+    using M       = std::flat_map<int, int>;
+    auto m        = M(std::sorted_unique, il1);
+    auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, il1};
+    assert(m2 == m);
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type>, const key_compare&);
+    using M = std::flat_map<int, int, std::function<bool(int, int)>>;
+    auto m  = M(std::sorted_unique, il1, std::less<int>());
+    assert(m == M({{1, 1}, {2, 2}, {4, 4}, {5, 5}}, std::less<>()));
+    assert(m.key_comp()(1, 2) == true);
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, il1, std::less<int>()};
+    assert(m2 == m);
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type>, const key_compare&);
+    // greater
+    using M = std::flat_map<int, int, std::greater<int>, std::deque<int, min_allocator<int>>, std::vector<int>>;
+    std::initializer_list<std::pair<int, int>> il4{{5, 5}, {4, 4}, {2, 2}, {1, 1}};
+    auto m = M(std::sorted_unique, il4, std::greater<int>());
+    assert((m == M{{5, 5}, {4, 4}, {2, 2}, {1, 1}}));
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type>,  const Allocator&)
+    using A1      = test_allocator<int>;
+    using A2      = test_allocator<short>;
+    using M       = std::flat_map<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m        = M(std::sorted_unique, il2, A1(5));
+    auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, il2, A1(5)};
+    assert(m2 == m);
+    assert(m2.keys().get_allocator() == A1(5));
+    assert(m2.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type>, const key_compare&, const Allocator&);
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    auto m   = M(std::sorted_unique, il2, C(3), A1(5));
+    assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(sorted_unique_t, initializer_list<value_type>, const key_compare&, const Allocator&);
+    // explicit(false)
+    using A1 = test_allocator<short>;
+    using A2 = test_allocator<int>;
+    using M  = std::flat_map<short, int, std::less<int>, std::deque<short, A1>, std::vector<int, A2>>;
+    M m      = {std::sorted_unique, il3, {}, A1(5)}; // implicit ctor
+    assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp
new file mode 100644
index 000000000000000..8eb7547e917cca0
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.cons/sorted_iter_iter.pass.cpp
@@ -0,0 +1,171 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   flat_map(sorted_unique_t, InputIterator first, InputIterator last, const key_compare& comp = key_compare());
+// template<class InputIterator, class Alloc>
+//   flat_map(InputIterator first, InputIterator last, const Alloc& a);
+// template<class InputIterator, class Allocator>
+//   flat_map(sorted_unique_t, InputIterator first, InputIterator last, const key_compare& comp, const Allocator& a);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_iterators.h"
+#include "test_macros.h"
+#include "../../../test_compare.h"
+
+int main(int, char**) {
+  {
+    // The constructors in this subclause shall not participate in overload
+    // resolution unless uses_allocator_v<key_container_type, Alloc> is true
+    // and uses_allocator_v<mapped_container_type, Alloc> is true.
+    using C     = test_less<int>;
+    using A1    = test_allocator<int>;
+    using A2    = other_allocator<int>;
+    using V1    = std::vector<int, A1>;
+    using V2    = std::vector<int, A2>;
+    using M1    = std::flat_map<int, int, C, V1, V1>;
+    using M2    = std::flat_map<int, int, C, V1, V2>;
+    using M3    = std::flat_map<int, int, C, V2, V1>;
+    using Iter1 = typename M1::iterator;
+    using Iter2 = typename M2::iterator;
+    using Iter3 = typename M3::iterator;
+    static_assert(std::is_constructible_v<M1, std::sorted_unique_t, Iter1, Iter1, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_unique_t, Iter1, Iter1, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_unique_t, Iter2, Iter2, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_unique_t, Iter3, Iter3, const A2&>);
+
+    static_assert(std::is_constructible_v<M1, std::sorted_unique_t, Iter1, Iter1, const C&, const A1&>);
+    static_assert(!std::is_constructible_v<M1, std::sorted_unique_t, Iter1, Iter1, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M2, std::sorted_unique_t, Iter2, Iter2, const C&, const A2&>);
+    static_assert(!std::is_constructible_v<M3, std::sorted_unique_t, Iter3, Iter3, const C&, const A2&>);
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator);
+    // cpp17_input_iterator
+    using M       = std::flat_map<int, int>;
+    using P       = std::pair<int, int>;
+    P ar[]        = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    auto m        = M(std::sorted_unique, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4));
+    auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, cpp17_input_iterator<const P*>(ar), cpp17_input_iterator<const P*>(ar + 4)};
+    assert(m2 == m);
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator);
+    // contiguous iterator
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>;
+    std::pair<int, int> ar[] = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    auto m                   = M(std::sorted_unique, ar, ar + 4);
+    auto expected            = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&);
+    // cpp_17_input_iterator
+    using M = std::flat_map<int, int, std::function<bool(int, int)>>;
+    using P = std::pair<int, int>;
+    P ar[]  = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    auto m  = M(std::sorted_unique,
+               cpp17_input_iterator<const P*>(ar),
+               cpp17_input_iterator<const P*>(ar + 4),
+               std::less<int>());
+    assert(m == M({{1, 1}, {2, 2}, {4, 4}, {5, 5}}, std::less<>()));
+    assert(m.key_comp()(1, 2) == true);
+
+    // explicit(false)
+    M m2 = {std::sorted_unique,
+            cpp17_input_iterator<const P*>(ar),
+            cpp17_input_iterator<const P*>(ar + 4),
+            std::less<int>()};
+    assert(m2 == m);
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&);
+    // greater
+    using M = std::flat_map<int, int, std::greater<int>, std::deque<int, min_allocator<int>>, std::vector<int>>;
+    using P = std::pair<int, int>;
+    P ar[]  = {{5, 5}, {4, 4}, {2, 2}, {1, 1}};
+    auto m  = M(std::sorted_unique,
+               cpp17_input_iterator<const P*>(ar),
+               cpp17_input_iterator<const P*>(ar + 4),
+               std::greater<int>());
+    assert((m == M{{5, 5}, {4, 4}, {2, 2}, {1, 1}}));
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&);
+    // contiguous iterator
+    using C = test_less<int>;
+    using M = std::flat_map<int, int, C, std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>;
+    std::pair<int, int> ar[1] = {{42, 42}};
+    auto m                    = M(std::sorted_unique, ar, ar, C(5));
+    assert(m.empty());
+    assert(m.key_comp() == C(5));
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator , InputIterator, const Allocator&)
+    using A1      = test_allocator<int>;
+    using A2      = test_allocator<short>;
+    using M       = std::flat_map<int, short, std::less<int>, std::vector<int, A1>, std::deque<short, A2>>;
+    using P       = std::pair<int, int>;
+    P ar[]        = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    auto m        = M(std::sorted_unique, ar, ar + 4, A1(5));
+    auto expected = M{{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    assert(m == expected);
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+
+    // explicit(false)
+    M m2 = {std::sorted_unique, ar, ar + 4, A1(5)};
+    assert(m2 == m);
+    assert(m2.keys().get_allocator() == A1(5));
+    assert(m2.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&, const Allocator&);
+    using C  = test_less<int>;
+    using A1 = test_allocator<int>;
+    using A2 = test_allocator<short>;
+    using M  = std::flat_map<int, short, C, std::vector<int, A1>, std::deque<short, A2>>;
+    using P  = std::pair<int, int>;
+    P ar[]   = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    auto m   = M(std::sorted_unique, ar, ar + 4, C(3), A1(5));
+    assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}));
+    assert(m.key_comp() == C(3));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+  {
+    // flat_map(sorted_unique_t, InputIterator, InputIterator, const key_compare&, const Allocator&);
+    // explicit(false)
+    using A1 = test_allocator<short>;
+    using A2 = test_allocator<int>;
+    using M  = std::flat_map<short, int, std::less<int>, std::deque<short, A1>, std::vector<int, A2>>;
+    using P  = std::pair<int, int>;
+    P ar[]   = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    M m      = {std::sorted_unique, ar, ar + 4, {}, A1(5)}; // implicit ctor
+    assert((m == M{{1, 1}, {2, 2}, {4, 4}, {5, 5}}));
+    assert(m.keys().get_allocator() == A1(5));
+    assert(m.values().get_allocator() == A2(5));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp
new file mode 100644
index 000000000000000..fb0563eec5376b8
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if.pass.cpp
@@ -0,0 +1,93 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class Key, class T, class Compare, class KeyContainer, class MappedContainer, class Predicate>
+//   typename flat_map<Key, T, Compare, KeyContainer, MappedContainer>::size_type
+//   erase_if(flat_map<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
+
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <initializer_list>
+#include <vector>
+
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+// Verify that `flat_map` (like `map`) does NOT support std::erase.
+//
+template <class S>
+concept HasStdErase = requires(S& s, typename S::value_type x) { std::erase(s, x); };
+static_assert(HasStdErase<std::vector<int>>);
+static_assert(!HasStdErase<std::flat_map<int, int>>);
+
+template <class M>
+M make(std::initializer_list<int> vals) {
+  M ret;
+  for (int v : vals)
+    ret[static_cast<typename M::key_type>(v)] = static_cast<typename M::mapped_type>(v + 10);
+  return ret;
+}
+
+template <class M, class Pred>
+void test0(
+    std::initializer_list<int> vals, Pred p, std::initializer_list<int> expected, std::size_t expected_erased_count) {
+  M s = make<M>(vals);
+  ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p)));
+  assert(expected_erased_count == std::erase_if(s, p));
+  assert(s == make<M>(expected));
+}
+
+template <class S>
+void test() {
+  // Test all the plausible signatures for this predicate.
+  auto is1   = [](typename S::const_reference v) { return v.first == 1; };
+  auto is2   = [](typename S::value_type v) { return v.first == 2; };
+  auto is3   = [](const typename S::value_type& v) { return v.first == 3; };
+  auto is4   = [](auto v) { return v.first == 4; };
+  auto True  = [](const auto&) { return true; };
+  auto False = [](auto&&) { return false; };
+
+  test0<S>({}, is1, {}, 0);
+
+  test0<S>({1}, is1, {}, 1);
+  test0<S>({1}, is2, {1}, 0);
+
+  test0<S>({1, 2}, is1, {2}, 1);
+  test0<S>({1, 2}, is2, {1}, 1);
+  test0<S>({1, 2}, is3, {1, 2}, 0);
+
+  test0<S>({1, 2, 3}, is1, {2, 3}, 1);
+  test0<S>({1, 2, 3}, is2, {1, 3}, 1);
+  test0<S>({1, 2, 3}, is3, {1, 2}, 1);
+  test0<S>({1, 2, 3}, is4, {1, 2, 3}, 0);
+
+  test0<S>({1, 2, 3}, True, {}, 3);
+  test0<S>({1, 2, 3}, False, {1, 2, 3}, 0);
+}
+
+int main(int, char**) {
+  test<std::flat_map<int, char>>();
+  test<std::flat_map<int,
+                     char,
+                     std::less<int>,
+                     std::vector<int, min_allocator<int>>,
+                     std::vector<char, min_allocator<char>>>>();
+  test<std::flat_map<int, char, std::greater<int>, std::vector<int, test_allocator<int>>>>();
+  test<std::flat_map<int, char, std::less<int>, std::deque<int, min_allocator<int>>>>();
+  test<std::flat_map<int, char, std::greater<int>, std::deque<int, test_allocator<int>>>>();
+  test<std::flat_map<long, int>>();
+  test<std::flat_map<double, int>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp
new file mode 100644
index 000000000000000..48fdec42db3fcba
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.erasure/erase_if_exceptions.pass.cpp
@@ -0,0 +1,155 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// UNSUPPORTED: no-exceptions
+
+// <flat_map>
+
+// template<class Key, class T, class Compare, class KeyContainer, class MappedContainer, class Predicate>
+//   typename flat_map<Key, T, Compare, KeyContainer, MappedContainer>::size_type
+//   erase_if(flat_map<Key, T, Compare, KeyContainer, MappedContainer>& c, Predicate pred);
+// If any member function in [flat.set.defn] exits via an exception, the invariant is restored.
+// (This is not a member function, but let's respect the invariant anyway.)
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "../helpers.h"
+#include "test_macros.h"
+
+struct Counter {
+  int c1, c2, throws;
+  void tick() {
+    c1 -= 1;
+    if (c1 == 0) {
+      c1 = c2;
+      throws += 1;
+      throw 42;
+    }
+  }
+};
+Counter g_counter = {0, 0, 0};
+
+struct ThrowingAssignment {
+  ThrowingAssignment(int i) : i_(i) {}
+  ThrowingAssignment(const ThrowingAssignment&) = default;
+  ThrowingAssignment& operator=(const ThrowingAssignment& rhs) {
+    g_counter.tick();
+    i_ = rhs.i_;
+    g_counter.tick();
+    return *this;
+  }
+  operator int() const { return i_; }
+  int i_;
+};
+
+struct ThrowingComparator {
+  bool operator()(const ThrowingAssignment& a, const ThrowingAssignment& b) const {
+    g_counter.tick();
+    return a.i_ < b.i_;
+  }
+};
+
+struct ErasurePredicate {
+  bool operator()(const auto& x) const { return (3 <= x.first && x.first <= 5); }
+};
+
+int main(int, char**) {
+  const std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+  {
+    using M = std::flat_map<ThrowingAssignment, int, ThrowingComparator>;
+    for (int first_throw = 1; first_throw < 99; ++first_throw) {
+      for (int second_throw = 1; second_throw < 99; ++second_throw) {
+        g_counter = {0, 0, 0};
+        M m       = M({1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8});
+        try {
+          g_counter = {first_throw, second_throw, 0};
+          auto n    = std::erase_if(m, ErasurePredicate());
+          assert(n == 3);
+          // If it didn't throw at all, we're done.
+          g_counter = {0, 0, 0};
+          assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}}));
+          first_throw = 99; // "done"
+          break;
+        } catch (int ex) {
+          assert(ex == 42);
+          check_invariant(m);
+          LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8));
+          if (g_counter.throws == 1) {
+            // We reached the first throw but not the second throw.
+            break;
+          }
+        }
+      }
+    }
+  }
+  {
+    using M = std::flat_map<int, ThrowingAssignment, ThrowingComparator>;
+    for (int first_throw = 1; first_throw < 99; ++first_throw) {
+      for (int second_throw = 1; second_throw < 99; ++second_throw) {
+        g_counter = {0, 0, 0};
+        M m       = M({1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8});
+        try {
+          g_counter = {first_throw, second_throw, 0};
+          auto n    = std::erase_if(m, ErasurePredicate());
+          assert(n == 3);
+          // If it didn't throw at all, we're done.
+          g_counter = {0, 0, 0};
+          assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}}));
+          first_throw = 99; // "done"
+          break;
+        } catch (int ex) {
+          assert(ex == 42);
+          check_invariant(m);
+          LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8));
+          if (g_counter.throws == 1) {
+            // We reached the first throw but not the second throw.
+            break;
+          }
+        }
+      }
+    }
+  }
+  {
+    using M =
+        std::flat_map<ThrowingAssignment, int, ThrowingComparator, std::deque<ThrowingAssignment>, std::deque<int>>;
+    for (int first_throw = 1; first_throw < 99; ++first_throw) {
+      for (int second_throw = 1; second_throw < 99; ++second_throw) {
+        g_counter                                = {0, 0, 0};
+        std::deque<ThrowingAssignment> container = {5, 6, 7, 8};
+        container.insert(container.begin(), {1, 2, 3, 4});
+        M m = M(std::move(container), {1, 2, 3, 4, 5, 6, 7, 8});
+        try {
+          g_counter = {first_throw, second_throw, 0};
+          auto n    = std::erase_if(m, ErasurePredicate());
+          assert(n == 3);
+          // If it didn't throw at all, we're done.
+          g_counter = {0, 0, 0};
+          assert((m == M{{1, 1}, {2, 2}, {6, 6}, {7, 7}, {8, 8}}));
+          first_throw = 99; // "done"
+          break;
+        } catch (int ex) {
+          assert(ex == 42);
+          check_invariant(m);
+          LIBCPP_ASSERT(m.empty() || std::equal(m.begin(), m.end(), expected, expected + 8));
+          if (g_counter.throws == 1) {
+            // We reached the first throw but not the second throw.
+            break;
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp
new file mode 100644
index 000000000000000..b63ce6b19ee165b
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator.pass.cpp
@@ -0,0 +1,96 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+//       iterator begin()   noexcept;
+// const_iterator begin()   const noexcept
+//       iterator end()     noexcept;
+// const_iterator end()     const noexcept;
+//
+// const_iterator cbegin()  const noexcept;
+// const_iterator cend()    const noexcept;
+
+#include <cassert>
+#include <cstddef>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m         = {{1, 'a'}, {2, 'b'}, {3, 'c'}, {4, 'd'}};
+  const M& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.begin()), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(m.cbegin()), typename M::const_iterator);
+  ASSERT_SAME_TYPE(decltype(cm.begin()), typename M::const_iterator);
+  ASSERT_SAME_TYPE(decltype(m.end()), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(m.cend()), typename M::const_iterator);
+  ASSERT_SAME_TYPE(decltype(cm.end()), typename M::const_iterator);
+  static_assert(noexcept(m.begin()));
+  static_assert(noexcept(cm.begin()));
+  static_assert(noexcept(m.cbegin()));
+  static_assert(noexcept(m.end()));
+  static_assert(noexcept(cm.end()));
+  static_assert(noexcept(m.cend()));
+  assert(m.size() == 4);
+  assert(std::distance(m.begin(), m.end()) == 4);
+  assert(std::distance(cm.begin(), cm.end()) == 4);
+  assert(std::distance(m.cbegin(), m.cend()) == 4);
+  typename M::iterator i;                   // default-construct
+  i                            = m.begin(); // move-assignment
+  typename M::const_iterator k = i;         // converting constructor
+  assert(i == k);                           // comparison
+  for (int j = 1; j <= 4; ++j, ++i) {       // pre-increment
+    assert(i->first == j);                  // operator->
+    assert(i->second == 'a' + j - 1);
+  }
+  assert(i == m.end());
+  for (int j = 4; j >= 1; --j) {
+    --i; // pre-decrement
+    assert((*i).first == j);
+    assert((*i).second == 'a' + j - 1);
+  }
+  assert(i == m.begin());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  {
+    // N3644 testing
+    using C = std::flat_map<int, char>;
+    C::iterator ii1{}, ii2{};
+    C::iterator ii4 = ii1;
+    C::const_iterator cii{};
+    assert(ii1 == ii2);
+    assert(ii1 == ii4);
+    assert(!(ii1 != ii2));
+
+    assert((ii1 == cii));
+    assert((cii == ii1));
+    assert(!(ii1 != cii));
+    assert(!(cii != ii1));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp
new file mode 100644
index 000000000000000..1975d0ed86cc8b8
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_comparison.pass.cpp
@@ -0,0 +1,155 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// flat_map iterators should be C++20 random access iterators
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using KI    = typename KeyContainer::iterator;
+  using I     = M::iterator;
+  using CI    = M::const_iterator;
+  using RI    = M::reverse_iterator;
+  using CRI   = M::const_reverse_iterator;
+
+  static_assert(std::equality_comparable<I>);
+  static_assert(std::equality_comparable<CI>);
+  static_assert(std::equality_comparable<RI>);
+  static_assert(std::equality_comparable<CRI>);
+
+  static_assert(std::totally_ordered<I>);
+  static_assert(std::totally_ordered<CI>);
+  static_assert(std::totally_ordered<RI>);
+  static_assert(std::totally_ordered<CRI>);
+
+  M m = {{1, 'a'}, {2, 'b'}, {3, 'c'}, {4, 'd'}};
+
+  I i1 = m.begin();
+  I i2 = m.begin() + 1;
+
+  assert(i1 == i1);
+  assert(!(i1 != i1));
+  assert(i1 != i2);
+  assert(!(i1 == i2));
+  assert(i1 < i2);
+  assert(!(i1 < i1));
+  assert(i1 <= i1);
+  assert(i1 <= i2);
+  assert(!(i2 <= i1));
+  assert(i2 > i1);
+  assert(!(i2 > i2));
+  assert(i2 >= i1);
+  assert(i2 >= i2);
+  assert(!(i1 >= i2));
+
+  CI ci1 = m.cbegin();
+  CI ci2 = m.cbegin() + 1;
+  assert(ci1 == ci1);
+  assert(!(ci1 != ci1));
+  assert(ci1 != ci2);
+  assert(!(ci1 == ci2));
+  assert(ci1 < ci2);
+  assert(!(ci1 < ci1));
+  assert(ci1 <= ci1);
+  assert(ci1 <= ci2);
+  assert(!(ci2 <= ci1));
+  assert(ci2 > ci1);
+  assert(!(ci2 > ci2));
+  assert(ci2 >= ci1);
+  assert(ci2 >= ci2);
+  assert(!(ci1 >= ci2));
+
+  RI ri1 = m.rbegin();
+  RI ri2 = m.rbegin() + 1;
+  assert(ri1 == ri1);
+  assert(!(ri1 != ri1));
+  assert(ri1 != ri2);
+  assert(!(ri1 == ri2));
+  assert(ri1 < ri2);
+  assert(!(ri1 < ri1));
+  assert(ri1 <= ri1);
+  assert(ri1 <= ri2);
+  assert(!(ri2 <= ri1));
+  assert(ri2 > ri1);
+  assert(!(ri2 > ri2));
+  assert(ri2 >= ri1);
+  assert(ri2 >= ri2);
+  assert(!(ri1 >= ri2));
+
+  CRI cri1 = m.crbegin();
+  CRI cri2 = m.crbegin() + 1;
+  assert(cri1 == cri1);
+  assert(!(cri1 != cri1));
+  assert(cri1 != cri2);
+  assert(!(cri1 == cri2));
+  assert(cri1 < cri2);
+  assert(!(cri1 < cri1));
+  assert(cri1 <= cri1);
+  assert(cri1 <= cri2);
+  assert(!(cri2 <= cri1));
+  assert(cri2 > cri1);
+  assert(!(cri2 > cri2));
+  assert(cri2 >= cri1);
+  assert(cri2 >= cri2);
+  assert(!(cri1 >= cri2));
+
+  if constexpr (std::three_way_comparable<KI>) {
+    static_assert(std::three_way_comparable<I>); // ...of course the wrapped iterators still support <=>.
+    static_assert(std::three_way_comparable<CI>);
+    static_assert(std::three_way_comparable<RI>);
+    static_assert(std::three_way_comparable<CRI>);
+    static_assert(std::same_as<decltype(I() <=> I()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(I() <=> CI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(CI() <=> CI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(RI() <=> RI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(RI() <=> CRI()), std::strong_ordering>);
+    static_assert(std::same_as<decltype(CRI() <=> CRI()), std::strong_ordering>);
+
+    assert(i1 <=> i1 == std::strong_ordering::equivalent);
+    assert(i1 <=> i2 == std::strong_ordering::less);
+    assert(i2 <=> i1 == std::strong_ordering::greater);
+
+    assert(ci1 <=> ci1 == std::strong_ordering::equivalent);
+    assert(ci1 <=> ci2 == std::strong_ordering::less);
+    assert(ci2 <=> ci1 == std::strong_ordering::greater);
+
+    assert(ri1 <=> ri1 == std::strong_ordering::equivalent);
+    assert(ri1 <=> ri2 == std::strong_ordering::less);
+    assert(ri2 <=> ri1 == std::strong_ordering::greater);
+
+    assert(cri1 <=> cri1 == std::strong_ordering::equivalent);
+    assert(cri1 <=> cri2 == std::strong_ordering::less);
+    assert(cri2 <=> cri1 == std::strong_ordering::greater);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp
new file mode 100644
index 000000000000000..28814e2e37e3c11
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/iterator_concept_conformance.compile.pass.cpp
@@ -0,0 +1,82 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// iterator, const_iterator, reverse_iterator, const_reverse_iterator
+
+#include <flat_map>
+#include <deque>
+#include <functional>
+#include <iterator>
+#include <string>
+#include <vector>
+#include <type_traits>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using C     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using I     = C::iterator;
+  using CI    = C::const_iterator;
+  using RI    = C::reverse_iterator;
+  using CRI   = C::const_reverse_iterator;
+  static_assert(std::random_access_iterator<I>);
+  static_assert(std::random_access_iterator<CI>);
+  static_assert(std::random_access_iterator<RI>);
+  static_assert(std::random_access_iterator<CRI>);
+  static_assert(!std::contiguous_iterator<I>);
+  static_assert(!std::contiguous_iterator<CI>);
+  static_assert(!std::contiguous_iterator<RI>);
+  static_assert(!std::contiguous_iterator<CRI>);
+  static_assert(!std::indirectly_writable<I, std::pair<int, char>>);
+  static_assert(!std::indirectly_writable<CI, std::pair<int, char>>);
+  static_assert(!std::indirectly_writable<RI, std::pair<int, char>>);
+  static_assert(!std::indirectly_writable<CRI, std::pair<int, char>>);
+  static_assert(std::sentinel_for<I, I>);
+  static_assert(std::sentinel_for<I, CI>);
+  static_assert(!std::sentinel_for<I, RI>);
+  static_assert(!std::sentinel_for<I, CRI>);
+  static_assert(std::sentinel_for<CI, I>);
+  static_assert(std::sentinel_for<CI, CI>);
+  static_assert(!std::sentinel_for<CI, RI>);
+  static_assert(!std::sentinel_for<CI, CRI>);
+  static_assert(!std::sentinel_for<RI, I>);
+  static_assert(!std::sentinel_for<RI, CI>);
+  static_assert(std::sentinel_for<RI, RI>);
+  static_assert(std::sentinel_for<RI, CRI>);
+  static_assert(!std::sentinel_for<CRI, I>);
+  static_assert(!std::sentinel_for<CRI, CI>);
+  static_assert(std::sentinel_for<CRI, RI>);
+  static_assert(std::sentinel_for<CRI, CRI>);
+  static_assert(std::indirectly_movable_storable<I, std::pair<int, char>*>);
+  static_assert(std::indirectly_movable_storable<CI, std::pair<int, char>*>);
+  static_assert(std::indirectly_movable_storable<RI, std::pair<int, char>*>);
+  static_assert(std::indirectly_movable_storable<CRI, std::pair<int, char>*>);
+
+#ifdef _LIBCPP_VERSION
+  static_assert(std::is_same_v<typename std::iterator_traits<I>::iterator_category, std::random_access_iterator_tag>);
+  static_assert(std::is_same_v<typename std::iterator_traits<CI>::iterator_category, std::random_access_iterator_tag>);
+  static_assert(std::is_same_v<typename std::iterator_traits<RI>::iterator_category, std::random_access_iterator_tag>);
+  static_assert(std::is_same_v<typename std::iterator_traits<CRI>::iterator_category, std::random_access_iterator_tag>);
+#endif
+}
+
+void test() {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp
new file mode 100644
index 000000000000000..abbad310f49caf5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/range_concept_conformance.compile.pass.cpp
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <string>
+#include <vector>
+#include "MinSequenceContainer.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  {
+    using Key   = typename KeyContainer::value_type;
+    using Value = typename ValueContainer::value_type;
+    using C     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+    static_assert(std::same_as<std::ranges::iterator_t<C>, typename C::iterator>);
+    static_assert(std::ranges::random_access_range<C>);
+    static_assert(!std::ranges::contiguous_range<C>);
+    static_assert(std::ranges::common_range<C>);
+    static_assert(std::ranges::input_range<C>);
+    static_assert(!std::ranges::view<C>);
+    static_assert(std::ranges::sized_range<C>);
+    static_assert(!std::ranges::borrowed_range<C>);
+    static_assert(std::ranges::viewable_range<C>);
+
+    static_assert(std::same_as<std::ranges::iterator_t<const C>, typename C::const_iterator>);
+    static_assert(std::ranges::random_access_range<const C>);
+    static_assert(!std::ranges::contiguous_range<const C>);
+    static_assert(std::ranges::common_range<const C>);
+    static_assert(std::ranges::input_range<const C>);
+    static_assert(!std::ranges::view<const C>);
+    static_assert(std::ranges::sized_range<const C>);
+    static_assert(!std::ranges::borrowed_range<const C>);
+    static_assert(!std::ranges::viewable_range<const C>);
+  }
+}
+
+void test() {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp
new file mode 100644
index 000000000000000..09e18986a7e813c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.iterators/reverse_iterator.pass.cpp
@@ -0,0 +1,90 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+//       reverse_iterator rbegin() noexcept;
+// const_reverse_iterator rbegin() const noexcept;
+//       reverse_iterator rend()   noexcept;
+// const_reverse_iterator rend()   const noexcept;
+//
+// const_reverse_iterator crbegin() const noexcept;
+// const_reverse_iterator crend()   const noexcept;
+
+#include <cassert>
+#include <cstddef>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+
+#include <iterator>
+
+#include "test_macros.h"
+#include <iostream>
+
+int main(int, char**) {
+  {
+    using M     = std::flat_map<int, char, std::less<int>, std::deque<int>, std::deque<char>>;
+    M m         = {{1, 'a'}, {2, 'b'}, {3, 'c'}, {4, 'd'}};
+    const M& cm = m;
+    ASSERT_SAME_TYPE(decltype(m.rbegin()), M::reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(m.crbegin()), M::const_reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(cm.rbegin()), M::const_reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(m.rend()), M::reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(m.crend()), M::const_reverse_iterator);
+    ASSERT_SAME_TYPE(decltype(cm.rend()), M::const_reverse_iterator);
+    static_assert(noexcept(m.rbegin()));
+    static_assert(noexcept(cm.rbegin()));
+    static_assert(noexcept(m.crbegin()));
+    static_assert(noexcept(m.rend()));
+    static_assert(noexcept(cm.rend()));
+    static_assert(noexcept(m.crend()));
+    assert(m.size() == 4);
+    assert(std::distance(m.rbegin(), m.rend()) == 4);
+    assert(std::distance(cm.rbegin(), cm.rend()) == 4);
+    assert(std::distance(m.crbegin(), m.crend()) == 4);
+    assert(std::distance(cm.crbegin(), cm.crend()) == 4);
+    M::reverse_iterator i; // default-construct
+    ASSERT_SAME_TYPE(decltype(i->first), const int&);
+    ASSERT_SAME_TYPE(decltype(i->second), char&);
+    i                           = m.rbegin(); // move-assignment
+    M::const_reverse_iterator k = i;          // converting constructor
+    assert(i == k);                           // comparison
+    for (int j = 4; j >= 1; --j, ++i) {       // pre-increment
+      assert(i->first == j);                  // operator->
+      assert(i->second == 'a' + j - 1);
+    }
+    assert(i == m.rend());
+    for (int j = 1; j <= 4; ++j) {
+      --i; // pre-decrement
+      assert((*i).first == j);
+      assert((*i).second == 'a' + j - 1);
+    }
+    assert(i == m.rbegin());
+  }
+  {
+    // N3644 testing
+    using C = std::flat_map<int, char>;
+    C::reverse_iterator ii1{}, ii2{};
+    C::reverse_iterator ii4 = ii1;
+    C::const_reverse_iterator cii{};
+    assert(ii1 == ii2);
+    assert(ii1 == ii4);
+    assert(!(ii1 != ii2));
+
+    assert((ii1 == cii));
+    assert((cii == ii1));
+    assert(!(ii1 != cii));
+    assert(!(cii != ii1));
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp
new file mode 100644
index 000000000000000..30271eb55660bf3
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/clear.pass.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_map
+
+// void clear() noexcept;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// test noexcept
+
+template <class T>
+concept NoExceptClear = requires(T t) {
+  { t.clear() } noexcept;
+};
+
+static_assert(NoExceptClear<std::flat_map<int, int>>);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+static_assert(
+    NoExceptClear<std::flat_map<int, int, std::less<int>, ThrowOnMoveContainer<int>, ThrowOnMoveContainer<int>>>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m = {{1, 2}, {2, 1}, {3, 3}, {4, 1}, {5, 0}};
+  assert(m.size() == 5);
+  ASSERT_NOEXCEPT(m.clear());
+  ASSERT_SAME_TYPE(decltype(m.clear()), void);
+  m.clear();
+  assert(m.size() == 0);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp
new file mode 100644
index 000000000000000..06631ac689f75db
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace.pass.cpp
@@ -0,0 +1,103 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class... Args>
+//   pair<iterator, bool> emplace(Args&&... args);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <tuple>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "../../../Emplaceable.h"
+#include "DefaultOnly.h"
+#include "min_allocator.h"
+
+// Constraints: is_constructible_v<pair<key_type, mapped_type>, Args...> is true.
+template <class M, class... Args>
+concept CanEmplace = requires(M m, Args&&... args) { m.emplace(std::forward<Args>(args)...); };
+
+using Map = std::flat_map<Emplaceable, Emplaceable>;
+static_assert(CanEmplace<Map>);
+static_assert(CanEmplace<Map, Emplaceable, Emplaceable>);
+static_assert(CanEmplace<Map, std::piecewise_construct_t, std::tuple<int, double>, std::tuple<int, double>>);
+static_assert(!CanEmplace<Map, Emplaceable>);
+static_assert(!CanEmplace<Map, int, double>);
+
+template <class KeyContainer, class ValueContainer>
+void test_simple() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = std::pair<typename M::iterator, bool>;
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.emplace()), R);
+  R r = m.emplace(typename M::value_type(2, 3.5));
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 1);
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 3.5);
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_emplaceable() {
+  using M = std::flat_map<int, Emplaceable, std::less<int>, KeyContainer, ValueContainer>;
+  using R = std::pair<typename M::iterator, bool>;
+
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.emplace()), R);
+  R r = m.emplace(std::piecewise_construct, std::forward_as_tuple(2), std::forward_as_tuple());
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 1);
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == Emplaceable());
+  r = m.emplace(std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 2);
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == Emplaceable(2, 3.5));
+  r = m.emplace(std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(!r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 2);
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == Emplaceable(2, 3.5));
+}
+
+int main(int, char**) {
+  test_simple<std::vector<int>, std::vector<double>>();
+  test_simple<std::deque<int>, std::vector<double>>();
+  test_simple<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test_simple<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  test_emplaceable<std::vector<int>, std::vector<Emplaceable>>();
+  test_emplaceable<std::deque<int>, std::vector<Emplaceable>>();
+  test_emplaceable<MinSequenceContainer<int>, MinSequenceContainer<Emplaceable>>();
+  test_emplaceable<std::vector<int, min_allocator<int>>, std::vector<Emplaceable, min_allocator<Emplaceable>>>();
+
+  {
+    auto emplace_func = [](auto& m, auto key_arg, auto value_arg) {
+      m.emplace(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+    };
+    test_emplace_exception_guarantee(emplace_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp
new file mode 100644
index 000000000000000..cfee6cac5806cc1
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/emplace_hint.pass.cpp
@@ -0,0 +1,102 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class... Args>
+//   iterator emplace_hint(const_iterator position, Args&&... args);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../../../Emplaceable.h"
+#include "DefaultOnly.h"
+#include "min_allocator.h"
+#include "../helpers.h"
+
+#if defined(_LIBCPP_VERSION)
+// spec only specifies `emplace(Args&&...)` is_constructible_v<pair<key_type, mapped_type>, Args...> is true.
+// nothing mentioned for emplace_hint
+template <class M, class... Args>
+concept CanEmplaceHint =
+    requires(M m, typename M::const_iterator i, Args&&... args) { m.emplace_hint(i, std::forward<Args>(args)...); };
+
+using Map = std::flat_map<Emplaceable, Emplaceable>;
+static_assert(CanEmplaceHint<Map>);
+static_assert(CanEmplaceHint<Map, Emplaceable, Emplaceable>);
+static_assert(CanEmplaceHint<Map, std::piecewise_construct_t, std::tuple<int, double>, std::tuple<int, double>>);
+static_assert(!CanEmplaceHint<Map, Emplaceable>);
+static_assert(!CanEmplaceHint<Map, int, double>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test_simple() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = M::iterator;
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.emplace_hint(m.cbegin())), R);
+  R r = m.emplace_hint(m.end(), typename M::value_type(2, 3.5));
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 3.5);
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_emplaceable() {
+  using M = std::flat_map<int, Emplaceable, std::less<int>, KeyContainer, ValueContainer>;
+  using R = M::iterator;
+
+  M m;
+  ASSERT_SAME_TYPE(decltype(m.emplace_hint(m.cbegin())), R);
+  R r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(2), std::forward_as_tuple());
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == Emplaceable());
+  r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == Emplaceable(2, 3.5));
+  r = m.emplace_hint(m.end(), std::piecewise_construct, std::forward_as_tuple(1), std::forward_as_tuple(2, 3.5));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == Emplaceable(2, 3.5));
+}
+
+int main(int, char**) {
+  test_simple<std::vector<int>, std::vector<double>>();
+  test_simple<std::deque<int>, std::vector<double>>();
+  test_simple<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test_simple<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  test_emplaceable<std::vector<int>, std::vector<Emplaceable>>();
+  test_emplaceable<std::deque<int>, std::vector<Emplaceable>>();
+  test_emplaceable<MinSequenceContainer<int>, MinSequenceContainer<Emplaceable>>();
+  test_emplaceable<std::vector<int, min_allocator<int>>, std::vector<Emplaceable, min_allocator<Emplaceable>>>();
+
+  {
+    auto emplace_func = [](auto& m, auto key_arg, auto value_arg) {
+      m.emplace_hint(m.begin(), std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+    };
+    test_emplace_exception_guarantee(emplace_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp
new file mode 100644
index 000000000000000..914e8b676a65682
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter.pass.cpp
@@ -0,0 +1,151 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// iterator erase(iterator position);
+// iterator erase(const_iterator position);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+  using I     = M::iterator;
+
+  P ar[] = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(6, 6.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  M m(ar, ar + sizeof(ar) / sizeof(ar[0]));
+  assert(m.size() == 8);
+  std::same_as<I> decltype(auto) i1 = m.erase(std::next(m.cbegin(), 3));
+  assert(m.size() == 7);
+  assert(i1 == std::next(m.begin(), 3));
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == 1.5);
+  assert(std::next(m.begin())->first == 2);
+  assert(std::next(m.begin())->second == 2.5);
+  assert(std::next(m.begin(), 2)->first == 3);
+  assert(std::next(m.begin(), 2)->second == 3.5);
+  assert(std::next(m.begin(), 3)->first == 5);
+  assert(std::next(m.begin(), 3)->second == 5.5);
+  assert(std::next(m.begin(), 4)->first == 6);
+  assert(std::next(m.begin(), 4)->second == 6.5);
+  assert(std::next(m.begin(), 5)->first == 7);
+  assert(std::next(m.begin(), 5)->second == 7.5);
+  assert(std::next(m.begin(), 6)->first == 8);
+  assert(std::next(m.begin(), 6)->second == 8.5);
+
+  std::same_as<I> decltype(auto) i2 = m.erase(std::next(m.begin(), 0));
+  assert(m.size() == 6);
+  assert(i2 == m.begin());
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 2.5);
+  assert(std::next(m.begin())->first == 3);
+  assert(std::next(m.begin())->second == 3.5);
+  assert(std::next(m.begin(), 2)->first == 5);
+  assert(std::next(m.begin(), 2)->second == 5.5);
+  assert(std::next(m.begin(), 3)->first == 6);
+  assert(std::next(m.begin(), 3)->second == 6.5);
+  assert(std::next(m.begin(), 4)->first == 7);
+  assert(std::next(m.begin(), 4)->second == 7.5);
+  assert(std::next(m.begin(), 5)->first == 8);
+  assert(std::next(m.begin(), 5)->second == 8.5);
+
+  std::same_as<I> decltype(auto) i3 = m.erase(std::next(m.cbegin(), 5));
+  assert(m.size() == 5);
+  assert(i3 == m.end());
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 2.5);
+  assert(std::next(m.begin())->first == 3);
+  assert(std::next(m.begin())->second == 3.5);
+  assert(std::next(m.begin(), 2)->first == 5);
+  assert(std::next(m.begin(), 2)->second == 5.5);
+  assert(std::next(m.begin(), 3)->first == 6);
+  assert(std::next(m.begin(), 3)->second == 6.5);
+  assert(std::next(m.begin(), 4)->first == 7);
+  assert(std::next(m.begin(), 4)->second == 7.5);
+
+  std::same_as<I> decltype(auto) i4 = m.erase(std::next(m.begin(), 1));
+  assert(m.size() == 4);
+  assert(i4 == std::next(m.begin()));
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 2.5);
+  assert(std::next(m.begin())->first == 5);
+  assert(std::next(m.begin())->second == 5.5);
+  assert(std::next(m.begin(), 2)->first == 6);
+  assert(std::next(m.begin(), 2)->second == 6.5);
+  assert(std::next(m.begin(), 3)->first == 7);
+  assert(std::next(m.begin(), 3)->second == 7.5);
+
+  std::same_as<I> decltype(auto) i5 = m.erase(std::next(m.cbegin(), 2));
+  assert(m.size() == 3);
+  assert(i5 == std::next(m.begin(), 2));
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 2.5);
+  assert(std::next(m.begin())->first == 5);
+  assert(std::next(m.begin())->second == 5.5);
+  assert(std::next(m.begin(), 2)->first == 7);
+  assert(std::next(m.begin(), 2)->second == 7.5);
+
+  std::same_as<I> decltype(auto) i6 = m.erase(std::next(m.begin(), 2));
+  assert(m.size() == 2);
+  assert(i6 == std::next(m.begin(), 2));
+  assert(m.begin()->first == 2);
+  assert(m.begin()->second == 2.5);
+  assert(std::next(m.begin())->first == 5);
+  assert(std::next(m.begin())->second == 5.5);
+
+  std::same_as<I> decltype(auto) i7 = m.erase(std::next(m.cbegin(), 0));
+  assert(m.size() == 1);
+  assert(i7 == std::next(m.begin(), 0));
+  assert(m.begin()->first == 5);
+  assert(m.begin()->second == 5.5);
+
+  std::same_as<I> decltype(auto) i8 = m.erase(m.begin());
+  assert(m.size() == 0);
+  assert(i8 == m.begin());
+  assert(i8 == m.end());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto erase_function = [](auto& m, auto) { m.erase(m.begin() + 2); };
+    test_erase_exception_guarantee(erase_function);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp
new file mode 100644
index 000000000000000..0bc92082940291b
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_iter_iter.pass.cpp
@@ -0,0 +1,109 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// iterator erase(const_iterator first, const_iterator last);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+  using I     = M::iterator;
+
+  P ar[] = {
+      P(1, 1.5),
+      P(2, 2.5),
+      P(3, 3.5),
+      P(4, 4.5),
+      P(5, 5.5),
+      P(6, 6.5),
+      P(7, 7.5),
+      P(8, 8.5),
+  };
+  M m(ar, ar + sizeof(ar) / sizeof(ar[0]));
+  assert(m.size() == 8);
+  std::same_as<I> decltype(auto) i1 = m.erase(m.cbegin(), m.cbegin());
+  assert(m.size() == 8);
+  assert(i1 == m.begin());
+  assert(m.begin()->first == 1);
+  assert(m.begin()->second == 1.5);
+  assert(std::next(m.begin())->first == 2);
+  assert(std::next(m.begin())->second == 2.5);
+  assert(std::next(m.begin(), 2)->first == 3);
+  assert(std::next(m.begin(), 2)->second == 3.5);
+  assert(std::next(m.begin(), 3)->first == 4);
+  assert(std::next(m.begin(), 3)->second == 4.5);
+  assert(std::next(m.begin(), 4)->first == 5);
+  assert(std::next(m.begin(), 4)->second == 5.5);
+  assert(std::next(m.begin(), 5)->first == 6);
+  assert(std::next(m.begin(), 5)->second == 6.5);
+  assert(std::next(m.begin(), 6)->first == 7);
+  assert(std::next(m.begin(), 6)->second == 7.5);
+  assert(std::next(m.begin(), 7)->first == 8);
+  assert(std::next(m.begin(), 7)->second == 8.5);
+
+  std::same_as<I> decltype(auto) i2 = m.erase(m.cbegin(), std::next(m.cbegin(), 2));
+  assert(m.size() == 6);
+  assert(i2 == m.begin());
+  assert(std::next(m.begin(), 0)->first == 3);
+  assert(std::next(m.begin(), 0)->second == 3.5);
+  assert(std::next(m.begin(), 1)->first == 4);
+  assert(std::next(m.begin(), 1)->second == 4.5);
+  assert(std::next(m.begin(), 2)->first == 5);
+  assert(std::next(m.begin(), 2)->second == 5.5);
+  assert(std::next(m.begin(), 3)->first == 6);
+  assert(std::next(m.begin(), 3)->second == 6.5);
+  assert(std::next(m.begin(), 4)->first == 7);
+  assert(std::next(m.begin(), 4)->second == 7.5);
+  assert(std::next(m.begin(), 5)->first == 8);
+  assert(std::next(m.begin(), 5)->second == 8.5);
+
+  std::same_as<I> decltype(auto) i3 = m.erase(std::next(m.cbegin(), 2), std::next(m.cbegin(), 6));
+  assert(m.size() == 2);
+  assert(i3 == std::next(m.begin(), 2));
+  assert(std::next(m.begin(), 0)->first == 3);
+  assert(std::next(m.begin(), 0)->second == 3.5);
+  assert(std::next(m.begin(), 1)->first == 4);
+  assert(std::next(m.begin(), 1)->second == 4.5);
+
+  std::same_as<I> decltype(auto) i4 = m.erase(m.cbegin(), m.cend());
+  assert(m.size() == 0);
+  assert(i4 == m.begin());
+  assert(i4 == m.end());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto erase_function = [](auto& m, auto) { m.erase(m.begin(), m.begin() + 2); };
+    test_erase_exception_guarantee(erase_function);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp
new file mode 100644
index 000000000000000..ef57b1cb5512d57
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key.pass.cpp
@@ -0,0 +1,91 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// size_type erase(const key_type& k);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer, class Compare = std::less<>>
+void test() {
+  using M = std::flat_map<int, char, Compare, KeyContainer, ValueContainer>;
+
+  auto make = [](std::initializer_list<int> il) {
+    M m;
+    for (int i : il) {
+      m.emplace(i, i);
+    }
+    return m;
+  };
+  M m = make({1, 2, 3, 4, 5, 6, 7, 8});
+  ASSERT_SAME_TYPE(decltype(m.erase(9)), typename M::size_type);
+  auto n = m.erase(9);
+  assert(n == 0);
+  assert(m == make({1, 2, 3, 4, 5, 6, 7, 8}));
+  n = m.erase(4);
+  assert(n == 1);
+  assert(m == make({1, 2, 3, 5, 6, 7, 8}));
+  n = m.erase(1);
+  assert(n == 1);
+  assert(m == make({2, 3, 5, 6, 7, 8}));
+  n = m.erase(8);
+  assert(n == 1);
+  assert(m == make({2, 3, 5, 6, 7}));
+  n = m.erase(3);
+  assert(n == 1);
+  assert(m == make({2, 5, 6, 7}));
+  n = m.erase(4);
+  assert(n == 0);
+  assert(m == make({2, 5, 6, 7}));
+  n = m.erase(6);
+  assert(n == 1);
+  assert(m == make({2, 5, 7}));
+  n = m.erase(7);
+  assert(n == 1);
+  assert(m == make({2, 5}));
+  n = m.erase(2);
+  assert(n == 1);
+  assert(m == make({5}));
+  n = m.erase(5);
+  assert(n == 1);
+  assert(m.empty());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::vector<int>, std::vector<char>, std::greater<>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  {
+    auto erase_function = [](auto& m, auto key_arg) {
+      using Map = std::decay_t<decltype(m)>;
+      using Key = typename Map::key_type;
+      const Key key{key_arg};
+      m.erase(key);
+    };
+    test_erase_exception_guarantee(erase_function);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp
new file mode 100644
index 000000000000000..3ba30757bf2c7d8
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/erase_key_transparent.pass.cpp
@@ -0,0 +1,144 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// size_type erase(K&& k);
+
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanErase        = requires(M m, Transparent<int> k) { m.erase(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanErase<TransparentMap>);
+static_assert(!CanErase<const TransparentMap>);
+static_assert(!CanErase<NonTransparentMap>);
+static_assert(!CanErase<const NonTransparentMap>);
+
+template <class Key, class It>
+struct HeterogeneousKey {
+  explicit HeterogeneousKey(Key key, It it) : key_(key), it_(it) {}
+  operator It() && { return it_; }
+  auto operator<=>(Key key) const { return key_ <=> key; }
+  friend bool operator<(const HeterogeneousKey&, const HeterogeneousKey&) {
+    assert(false);
+    return false;
+  }
+  Key key_;
+  It it_;
+};
+
+template <class KeyContainer, class ValueContainer>
+void test_simple() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m = {{1, 1}, {2, 2}, {3, 3}, {4, 4}};
+  ASSERT_SAME_TYPE(decltype(m.erase(9)), typename M::size_type);
+  auto n = m.erase(3); // erase(K&&) [with K=int]
+  assert(n == 1);
+  assert((m == M{{1, 1}, {2, 2}, {4, 4}}));
+  typename M::key_type lvalue = 2;
+  n                           = m.erase(lvalue); // erase(K&&) [with K=int&]
+  assert(n == 1);
+  assert((m == M{{1, 1}, {4, 4}}));
+  const typename M::key_type const_lvalue = 1;
+  n                                       = m.erase(const_lvalue); // erase(const key_type&)
+  assert(n == 1);
+  assert((m == M{{4, 4}}));
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_transparent_comparator() {
+  using M = std::flat_map<std::string, int, TransparentComparator, KeyContainer, ValueContainer>;
+  M m     = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  ASSERT_SAME_TYPE(decltype(m.erase(Transparent<std::string>{"abc"})), typename M::size_type);
+
+  auto n = m.erase(Transparent<std::string>{"epsilon"});
+  assert(n == 1);
+
+  M expected = {{"alpha", 1}, {"beta", 2}, {"eta", 4}, {"gamma", 5}};
+  assert(m == expected);
+
+  auto n2 = m.erase(Transparent<std::string>{"aaa"});
+  assert(n2 == 0);
+  assert(m == expected);
+}
+
+int main(int, char**) {
+  test_simple<std::vector<int>, std::vector<double>>();
+  test_simple<std::deque<int>, std::vector<double>>();
+  test_simple<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test_simple<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  test_transparent_comparator<std::vector<std::string>, std::vector<int>>();
+  test_transparent_comparator<std::deque<std::string>, std::vector<int>>();
+  test_transparent_comparator<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test_transparent_comparator<std::vector<std::string, min_allocator<std::string>>,
+                              std::vector<int, min_allocator<int>>>();
+
+  {
+    // P2077's HeterogeneousKey example
+    using M                           = std::flat_map<int, int, std::less<>>;
+    M m                               = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+    auto h1                           = HeterogeneousKey<int, M::iterator>(8, m.begin());
+    std::same_as<M::size_type> auto n = m.erase(h1); // lvalue is not convertible to It; erase(K&&) is the best match
+    assert(n == 1);
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}));
+    std::same_as<M::iterator> auto it = m.erase(std::move(h1)); // rvalue is convertible to It; erase(K&&) drops out
+    assert(it == m.begin());
+    assert((m == M{{2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}));
+  }
+  {
+    using M                           = std::flat_map<int, int, std::less<>>;
+    M m                               = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+    auto h1                           = HeterogeneousKey<int, M::const_iterator>(8, m.begin());
+    std::same_as<M::size_type> auto n = m.erase(h1); // lvalue is not convertible to It; erase(K&&) is the best match
+    assert(n == 1);
+    assert((m == M{{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}));
+    std::same_as<M::iterator> auto it = m.erase(std::move(h1)); // rvalue is convertible to It; erase(K&&) drops out
+    assert(it == m.begin());
+    assert((m == M{{2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}, {7, 7}}));
+  }
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto n = m.erase(Transparent<int>{3});
+    assert(n == 1);
+    assert(transparent_used);
+  }
+  {
+    auto erase_transparent = [](auto& m, auto key_arg) {
+      using Map = std::decay_t<decltype(m)>;
+      using Key = typename Map::key_type;
+      m.erase(Transparent<Key>{key_arg});
+    };
+    test_erase_exception_guarantee(erase_transparent);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp
new file mode 100644
index 000000000000000..d8e4ce94efb9e98
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/extract.pass.cpp
@@ -0,0 +1,91 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// containers extract() &&;
+
+#include <algorithm>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class T>
+concept CanExtract = requires(T&& t) { std::forward<T>(t).extract(); };
+
+static_assert(CanExtract<std::flat_map<int, int>&&>);
+static_assert(!CanExtract<std::flat_map<int, int>&>);
+static_assert(!CanExtract<std::flat_map<int, int> const&>);
+static_assert(!CanExtract<std::flat_map<int, int> const&&>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using M = std::flat_map<int, int, std::less<int>, KeyContainer, ValueContainer>;
+  M m     = M({1, 2, 3}, {4, 5, 6});
+
+  std::same_as<typename M::containers> auto containers = std::move(m).extract();
+
+  auto expected_keys   = {1, 2, 3};
+  auto expected_values = {4, 5, 6};
+  assert(std::ranges::equal(containers.keys, expected_keys));
+  assert(std::ranges::equal(containers.values, expected_values));
+  check_invariant(m);
+  LIBCPP_ASSERT(m.empty());
+  LIBCPP_ASSERT(m.keys().size() == 0);
+  LIBCPP_ASSERT(m.values().size() == 0);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+  {
+    // extracted object maintains invariant if one of underlying container does not clear after move
+    using M = std::flat_map<int, int, std::less<>, std::vector<int>, CopyOnlyVector<int>>;
+    M m     = M({1, 2, 3}, {1, 2, 3});
+    std::same_as<M::containers> auto containers = std::move(m).extract();
+    assert(containers.keys.size() == 3);
+    assert(containers.values.size() == 3);
+    check_invariant(m);
+    LIBCPP_ASSERT(m.empty());
+    LIBCPP_ASSERT(m.keys().size() == 0);
+    LIBCPP_ASSERT(m.values().size() == 0);
+  }
+
+  {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnMoveContainer<int>;
+    using M              = std::flat_map<int, int, std::ranges::less, KeyContainer, ValueContainer>;
+
+    M m;
+    m.emplace(1, 1);
+    m.emplace(2, 2);
+    try {
+      auto c = std::move(m).extract();
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we try to erase the key after value emplacement failure.
+      // and after erasure failure, we clear the flat_map
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+#endif
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp
new file mode 100644
index 000000000000000..7e667c4e4877bff
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_cv.pass.cpp
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// pair<iterator, bool> insert(const value_type& v);
+
+#include <flat_map>
+#include <deque>
+#include <cassert>
+#include <functional>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = std::pair<typename M::iterator, bool>;
+  using VT    = typename M::value_type;
+  M m;
+
+  const VT v1(2, 2.5);
+  std::same_as<R> decltype(auto) r = m.insert(v1);
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 1);
+  assert(r.first->first == 2);
+  assert(r.first->second == 2.5);
+
+  const VT v2(1, 1.5);
+  r = m.insert(v2);
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 2);
+  assert(r.first->first == 1);
+  assert(r.first->second == 1.5);
+
+  const VT v3(3, 3.5);
+  r = m.insert(v3);
+  assert(r.second);
+  assert(r.first == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r.first->first == 3);
+  assert(r.first->second == 3.5);
+
+  const VT v4(3, 4.5);
+  r = m.insert(v4);
+  assert(!r.second);
+  assert(r.first == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r.first->first == 3);
+  assert(r.first->second == 3.5);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      const value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(p);
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp
new file mode 100644
index 000000000000000..32be3ab8a95b3d2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_initializer_list.pass.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// void insert(initializer_list<value_type> il);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using V     = std::pair<const int, double>;
+
+  M m = {{1, 1}, {1, 1.5}, {1, 2}, {3, 1}, {3, 1.5}, {3, 2}};
+  m.insert({
+      {4, 1},
+      {4, 1.5},
+      {4, 2},
+      {1, 1},
+      {1, 1.5},
+      {1, 2},
+      {2, 1},
+      {2, 1.5},
+      {2, 2},
+  });
+  assert(m.size() == 4);
+  assert(std::distance(m.begin(), m.end()) == 4);
+  assert(*m.begin() == V(1, 1));
+  assert(*std::next(m.begin()) == V(2, 1));
+  assert(*std::next(m.begin(), 2) == V(3, 1));
+  assert(*std::next(m.begin(), 3) == V(4, 1));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) {
+      using FlatMap                        = std::decay_t<decltype(m)>;
+      using value_type                     = typename FlatMap::value_type;
+      std::initializer_list<value_type> il = {{newValues[0].first, newValues[0].second}};
+      m.insert(il);
+    };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp
new file mode 100644
index 000000000000000..4bbe0628317dcba
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_cv.pass.cpp
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// iterator insert(const_iterator position, const value_type& v);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using R     = typename M::iterator;
+  using VT    = typename M::value_type;
+
+  M m;
+  const VT v1(2, 2.5);
+  std::same_as<R> decltype(auto) r = m.insert(m.end(), v1);
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == 2.5);
+
+  const VT v2(1, 1.5);
+  r = m.insert(m.end(), v2);
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == 1.5);
+
+  const VT v3(3, 3.5);
+  r = m.insert(m.end(), v3);
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3.5);
+
+  const VT v4(3, 4.5);
+  r = m.insert(m.end(), v4);
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3.5);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      const value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(m.begin(), p);
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp
new file mode 100644
index 000000000000000..8455b19475fe43e
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_iter.pass.cpp
@@ -0,0 +1,89 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   void insert(InputIterator first, InputIterator last);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// test constraint InputIterator
+template <class M, class... Args>
+concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward<Args>(args)...); };
+
+using Map  = std::flat_map<int, int>;
+using Pair = std::pair<int, int>;
+
+static_assert(CanInsert<Map, Pair*, Pair*>);
+static_assert(CanInsert<Map, cpp17_input_iterator<Pair*>, cpp17_input_iterator<Pair*>>);
+static_assert(!CanInsert<Map, int, int>);
+static_assert(!CanInsert<Map, cpp20_input_iterator<Pair*>, cpp20_input_iterator<Pair*>>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using P = std::pair<int, double>;
+  using M = std::flat_map<int, double, std::less<int>, KeyContainer, ValueContainer>;
+
+  P ar1[] = {
+      P(2, 1),
+      P(2, 1.5),
+      P(2, 2),
+      P(1, 1),
+      P(1, 1.5),
+      P(1, 2),
+      P(3, 1),
+      P(3, 1.5),
+      P(3, 2),
+  };
+  P ar2[] = {
+      P(4, 1),
+      P(4, 1.5),
+      P(4, 2),
+      P(1, 1),
+      P(1, 1.5),
+      P(1, 2),
+      P(0, 1),
+      P(0, 1.5),
+      P(0, 2),
+  };
+
+  M m;
+  m.insert(cpp17_input_iterator<P*>(ar1), cpp17_input_iterator<P*>(ar1 + sizeof(ar1) / sizeof(ar1[0])));
+  assert(m.size() == 3);
+  M expected{{1, 1}, {2, 1}, {3, 1}};
+  assert(m == expected);
+
+  m.insert(cpp17_input_iterator<P*>(ar2), cpp17_input_iterator<P*>(ar2 + sizeof(ar2) / sizeof(ar2[0])));
+  assert(m.size() == 5);
+  M expected2{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}};
+  assert(m == expected2);
+}
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) { m.insert(newValues.begin(), newValues.end()); };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp
new file mode 100644
index 000000000000000..034941b55eb80b1
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_iter_rv.pass.cpp
@@ -0,0 +1,88 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+//     iterator insert(const_iterator position, value_type&&);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "../helpers.h"
+#include "test_macros.h"
+
+template <class Container, class Pair>
+void do_insert_iter_rv_test() {
+  using M = Container;
+  using P = Pair;
+  using R = typename M::iterator;
+  M m;
+  std::same_as<R> decltype(auto) r = m.insert(m.end(), P(2, 2));
+  assert(r == m.begin());
+  assert(m.size() == 1);
+  assert(r->first == 2);
+  assert(r->second == 2);
+
+  r = m.insert(m.end(), P(1, 1));
+  assert(r == m.begin());
+  assert(m.size() == 2);
+  assert(r->first == 1);
+  assert(r->second == 1);
+
+  r = m.insert(m.end(), P(3, 3));
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3);
+
+  r = m.insert(m.end(), P(3, 4));
+  assert(r == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r->first == 3);
+  assert(r->second == 3);
+}
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+  using CP    = std::pair<const Key, Value>;
+
+  do_insert_iter_rv_test<M, P>();
+  do_insert_iter_rv_test<M, CP>();
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::vector<int>, std::vector<MoveOnly>>();
+  test<std::deque<int>, std::deque<double>>();
+  test<std::deque<int>, std::deque<MoveOnly>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<MoveOnly>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<MoveOnly, min_allocator<MoveOnly>>>();
+
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(m.begin(), std::move(p));
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp
new file mode 100644
index 000000000000000..398a7a1a4052e08
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign.pass.cpp
@@ -0,0 +1,326 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+// template<class M>
+//   pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj);
+// template<class M>
+//   pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj);
+// template<class M>
+//   iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj);
+// template<class M>
+//   iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj);
+
+// Constraints: is_assignable_v<mapped_type&, M> is true and is_constructible_v<mapped_type, M> is true.
+template <class Map, class K, class M>
+concept CanInsertOrAssign =
+    requires(Map map, K&& k, M&& m) { map.insert_or_assign(std::forward<K>(k), std::forward<M>(m)); };
+
+template <class Map, class K, class M>
+concept CanInsertOrAssignIter = requires(Map map, typename Map::const_iterator iter, K&& k, M&& m) {
+  map.insert_or_assign(iter, std::forward<K>(k), std::forward<M>(m));
+};
+
+template <class From>
+struct ConstructAndAssignFrom {
+  explicit ConstructAndAssignFrom(From);
+  ConstructAndAssignFrom& operator=(From);
+};
+
+template <class From>
+struct ConstructFrom {
+  explicit ConstructFrom(From);
+};
+
+template <class From>
+struct AssignFrom {
+  AssignFrom& operator=(From);
+};
+
+struct V {};
+
+static_assert(CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>>, const int&, V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>>, const int&, int>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructFrom<V>>, const int&, V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, AssignFrom<V>>, const int&, V>);
+
+static_assert(CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>>, int&&, V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>>, int&&, int>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructFrom<V>>, int&&, V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, AssignFrom<V>>, int&&, V>);
+
+static_assert(CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>>, const int&, V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>>, const int&, int>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructFrom<V>>, const int&, V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, AssignFrom<V>>, const int&, V>);
+
+static_assert(CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>>, int&&, V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>>, int&&, int>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructFrom<V>>, int&&, V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, AssignFrom<V>>, int&&, V>);
+
+template <class KeyContainer, class ValueContainer>
+void test_cv_key() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+  { // pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj);
+    using R = std::pair<typename M::iterator, bool>;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(i, Moveable(i, (double)i));
+    assert(m.size() == 10);
+
+    for (int i = 0; i < 20; i += 2) {
+      Moveable mv(i + 1, i + 1);
+      std::same_as<R> decltype(auto) r1 = m.insert_or_assign(i, std::move(mv));
+      assert(m.size() == 10);
+      assert(!r1.second);                      // was not inserted
+      assert(mv.moved());                      // was moved from
+      assert(r1.first->first == i);            // key
+      assert(r1.first->second.get() == i + 1); // value
+    }
+
+    Moveable mv1(5, 5.0);
+    std::same_as<R> decltype(auto) r2 = m.insert_or_assign(-1, std::move(mv1));
+    assert(m.size() == 11);
+    assert(r2.second);                   // was inserted
+    assert(mv1.moved());                 // was moved from
+    assert(r2.first->first == -1);       // key
+    assert(r2.first->second.get() == 5); // value
+
+    Moveable mv2(9, 9.0);
+    std::same_as<R> decltype(auto) r3 = m.insert_or_assign(3, std::move(mv2));
+    assert(m.size() == 12);
+    assert(r3.second);                   // was inserted
+    assert(mv2.moved());                 // was moved from
+    assert(r3.first->first == 3);        // key
+    assert(r3.first->second.get() == 9); // value
+
+    Moveable mv3(-1, 5.0);
+    std::same_as<R> decltype(auto) r4 = m.insert_or_assign(117, std::move(mv3));
+    assert(m.size() == 13);
+    assert(r4.second);                    // was inserted
+    assert(mv3.moved());                  // was moved from
+    assert(r4.first->first == 117);       // key
+    assert(r4.first->second.get() == -1); // value
+  }
+
+  { // iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj);
+    M m;
+    using R = M::iterator;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(i, Moveable(i, (double)i));
+    assert(m.size() == 10);
+    typename M::const_iterator it = m.find(2);
+
+    Moveable mv1(3, 3.0);
+    std::same_as<R> decltype(auto) r1 = m.insert_or_assign(it, 2, std::move(mv1));
+    assert(m.size() == 10);
+    assert(mv1.moved());           // was moved from
+    assert(r1->first == 2);        // key
+    assert(r1->second.get() == 3); // value
+
+    Moveable mv2(5, 5.0);
+    std::same_as<R> decltype(auto) r2 = m.insert_or_assign(it, 3, std::move(mv2));
+    assert(m.size() == 11);
+    assert(mv2.moved());           // was moved from
+    assert(r2->first == 3);        // key
+    assert(r2->second.get() == 5); // value
+
+    // wrong hint: begin()
+    Moveable mv3(7, 7.0);
+    std::same_as<R> decltype(auto) r3 = m.insert_or_assign(m.begin(), 4, std::move(mv3));
+    assert(m.size() == 11);
+    assert(mv3.moved());           // was moved from
+    assert(r3->first == 4);        // key
+    assert(r3->second.get() == 7); // value
+
+    Moveable mv4(9, 9.0);
+    std::same_as<R> decltype(auto) r4 = m.insert_or_assign(m.begin(), 5, std::move(mv4));
+    assert(m.size() == 12);
+    assert(mv4.moved());           // was moved from
+    assert(r4->first == 5);        // key
+    assert(r4->second.get() == 9); // value
+
+    // wrong hint: end()
+    Moveable mv5(11, 11.0);
+    std::same_as<R> decltype(auto) r5 = m.insert_or_assign(m.end(), 6, std::move(mv5));
+    assert(m.size() == 12);
+    assert(mv5.moved());            // was moved from
+    assert(r5->first == 6);         // key
+    assert(r5->second.get() == 11); // value
+
+    Moveable mv6(13, 13.0);
+    std::same_as<R> decltype(auto) r6 = m.insert_or_assign(m.end(), 7, std::move(mv6));
+    assert(m.size() == 13);
+    assert(mv6.moved());            // was moved from
+    assert(r6->first == 7);         // key
+    assert(r6->second.get() == 13); // value
+
+    // wrong hint: third element
+    Moveable mv7(15, 15.0);
+    std::same_as<R> decltype(auto) r7 = m.insert_or_assign(std::next(m.begin(), 2), 8, std::move(mv7));
+    assert(m.size() == 13);
+    assert(mv7.moved());            // was moved from
+    assert(r7->first == 8);         // key
+    assert(r7->second.get() == 15); // value
+
+    Moveable mv8(17, 17.0);
+    std::same_as<R> decltype(auto) r8 = m.insert_or_assign(std::next(m.begin(), 2), 9, std::move(mv8));
+    assert(m.size() == 14);
+    assert(mv8.moved());            // was moved from
+    assert(r8->first == 9);         // key
+    assert(r8->second.get() == 17); // value
+  }
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_rv_key() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  { // pair<iterator, bool> insert_or_assign(key_type&& k, M&& obj);
+    using R = std::pair<typename M::iterator, bool>;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1));
+    assert(m.size() == 10);
+
+    Moveable mvkey1(2, 2.0);
+    Moveable mv1(4, 4.0);
+    std::same_as<R> decltype(auto) r1 = m.insert_or_assign(std::move(mvkey1), std::move(mv1));
+    assert(m.size() == 10);
+    assert(!r1.second);                  // was not inserted
+    assert(!mvkey1.moved());             // was not moved from
+    assert(mv1.moved());                 // was moved from
+    assert(r1.first->first == mvkey1);   // key
+    assert(r1.first->second.get() == 4); // value
+
+    Moveable mvkey2(3, 3.0);
+    Moveable mv2(5, 5.0);
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(std::move(mvkey2), std::move(mv2));
+    assert(m.size() == 11);
+    assert(r2.second);                   // was inserted
+    assert(mv2.moved());                 // was moved from
+    assert(mvkey2.moved());              // was moved from
+    assert(r2.first->first.get() == 3);  // key
+    assert(r2.first->second.get() == 5); // value
+  }
+  { // iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj);
+    using R = M::iterator;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1));
+    assert(m.size() == 10);
+    typename M::const_iterator it = std::next(m.cbegin());
+
+    Moveable mvkey1(2, 2.0);
+    Moveable mv1(4, 4.0);
+    std::same_as<R> decltype(auto) r1 = m.insert_or_assign(it, std::move(mvkey1), std::move(mv1));
+    assert(m.size() == 10);
+    assert(mv1.moved());           // was moved from
+    assert(!mvkey1.moved());       // was not moved from
+    assert(r1->first == mvkey1);   // key
+    assert(r1->second.get() == 4); // value
+
+    Moveable mvkey2(3, 3.0);
+    Moveable mv2(5, 5.0);
+    std::same_as<R> decltype(auto) r2 = m.insert_or_assign(it, std::move(mvkey2), std::move(mv2));
+    assert(m.size() == 11);
+    assert(mv2.moved());           // was moved from
+    assert(mvkey2.moved());        // was moved from
+    assert(r2->first.get() == 3);  // key
+    assert(r2->second.get() == 5); // value
+
+    // wrong hint: begin()
+    Moveable mvkey3(6, 6.0);
+    Moveable mv3(8, 8.0);
+    std::same_as<R> decltype(auto) r3 = m.insert_or_assign(m.begin(), std::move(mvkey3), std::move(mv3));
+    assert(m.size() == 11);
+    assert(mv3.moved());           // was moved from
+    assert(!mvkey3.moved());       // was not moved from
+    assert(r3->first == mvkey3);   // key
+    assert(r3->second.get() == 8); // value
+
+    Moveable mvkey4(7, 7.0);
+    Moveable mv4(9, 9.0);
+    std::same_as<R> decltype(auto) r4 = m.insert_or_assign(m.begin(), std::move(mvkey4), std::move(mv4));
+    assert(m.size() == 12);
+    assert(mv4.moved());           // was moved from
+    assert(mvkey4.moved());        // was moved from
+    assert(r4->first.get() == 7);  // key
+    assert(r4->second.get() == 9); // value
+
+    // wrong hint: end()
+    Moveable mvkey5(8, 8.0);
+    Moveable mv5(10, 10.0);
+    std::same_as<R> decltype(auto) r5 = m.insert_or_assign(m.end(), std::move(mvkey5), std::move(mv5));
+    assert(m.size() == 12);
+    assert(mv5.moved());            // was moved from
+    assert(!mvkey5.moved());        // was not moved from
+    assert(r5->first == mvkey5);    // key
+    assert(r5->second.get() == 10); // value
+
+    Moveable mvkey6(9, 9.0);
+    Moveable mv6(11, 11.0);
+    std::same_as<R> decltype(auto) r6 = m.insert_or_assign(m.end(), std::move(mvkey6), std::move(mv6));
+    assert(m.size() == 13);
+    assert(mv6.moved());            // was moved from
+    assert(mvkey6.moved());         // was moved from
+    assert(r6->first.get() == 9);   // key
+    assert(r6->second.get() == 11); // value
+
+    // wrong hint: third element
+    Moveable mvkey7(10, 10.0);
+    Moveable mv7(12, 12.0);
+    std::same_as<R> decltype(auto) r7 = m.insert_or_assign(std::next(m.begin(), 2), std::move(mvkey7), std::move(mv7));
+    assert(m.size() == 13);
+    assert(mv7.moved());            // was moved from
+    assert(!mvkey7.moved());        // was not moved from
+    assert(r7->first == mvkey7);    // key
+    assert(r7->second.get() == 12); // value
+
+    Moveable mvkey8(11, 11.0);
+    Moveable mv8(13, 13.0);
+    std::same_as<R> decltype(auto) r8 = m.insert_or_assign(std::next(m.begin(), 2), std::move(mvkey8), std::move(mv8));
+    assert(m.size() == 14);
+    assert(mv8.moved());            // was moved from
+    assert(mvkey8.moved());         // was moved from
+    assert(r8->first.get() == 11);  // key
+    assert(r8->second.get() == 13); // value
+  }
+}
+
+int main(int, char**) {
+  test_cv_key<std::vector<int>, std::vector<Moveable>>();
+  test_cv_key<std::deque<int>, std::vector<Moveable>>();
+  test_cv_key<MinSequenceContainer<int>, MinSequenceContainer<Moveable>>();
+  test_cv_key<std::vector<int, min_allocator<int>>, std::vector<Moveable, min_allocator<Moveable>>>();
+
+  test_rv_key<std::vector<Moveable>, std::vector<Moveable>>();
+  test_rv_key<std::deque<Moveable>, std::vector<Moveable>>();
+  test_rv_key<MinSequenceContainer<Moveable>, MinSequenceContainer<Moveable>>();
+  test_rv_key<std::vector<Moveable, min_allocator<Moveable>>, std::vector<Moveable, min_allocator<Moveable>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp
new file mode 100644
index 000000000000000..636c4edfe551de8
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_or_assign_transparent.pass.cpp
@@ -0,0 +1,259 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+// template<class K, class M>
+//   pair<iterator, bool> insert_or_assign(K&& k, M&& obj);
+// template<class K, class M>
+//   iterator insert_or_assign(const_iterator hint, K&& k, M&& obj);
+
+// Constraints:
+// The qualified-id Compare::is_transparent is valid and denotes a type.
+// is_constructible_v<key_type, K> is true.
+// is_assignable_v<mapped_type&, M> is true.
+// is_constructible_v<mapped_type, M> is true.
+
+template <class Map, class K, class M>
+concept CanInsertOrAssign =
+    requires(Map map, K&& k, M&& m) { map.insert_or_assign(std::forward<K>(k), std::forward<M>(m)); };
+
+template <class Map, class K, class M>
+concept CanInsertOrAssignIter = requires(Map map, typename Map::const_iterator iter, K&& k, M&& m) {
+  map.insert_or_assign(iter, std::forward<K>(k), std::forward<M>(m));
+};
+
+template <class From>
+struct ConstructAndAssignFrom {
+  explicit ConstructAndAssignFrom(From);
+  ConstructAndAssignFrom& operator=(From);
+};
+
+template <class From>
+struct ConstructFrom {
+  explicit ConstructFrom(From);
+};
+
+template <class From>
+struct AssignFrom {
+  AssignFrom& operator=(From);
+};
+
+struct V {};
+
+static_assert(CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>, TransparentComparator>,
+                                ConvertibleTransparent<int>,
+                                V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>, TransparentComparator>,
+                                 NonConvertibleTransparent<int>,
+                                 V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>, NonTransparentComparator>,
+                                 NonConvertibleTransparent<int>,
+                                 V>);
+static_assert(!CanInsertOrAssign<std::flat_map<int, ConstructAndAssignFrom<V>, TransparentComparator>,
+                                 ConvertibleTransparent<int>,
+                                 int>);
+static_assert(
+    !CanInsertOrAssign<std::flat_map<int, ConstructFrom<V>, TransparentComparator>, ConvertibleTransparent<int>, V>);
+static_assert(
+    !CanInsertOrAssign<std::flat_map<int, AssignFrom<V>, TransparentComparator>, ConvertibleTransparent<int>, V>);
+
+static_assert(CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>, TransparentComparator>,
+                                    ConvertibleTransparent<int>,
+                                    V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>, TransparentComparator>,
+                                     NonConvertibleTransparent<int>,
+                                     V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>, NonTransparentComparator>,
+                                     NonConvertibleTransparent<int>,
+                                     V>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructAndAssignFrom<V>, TransparentComparator>,
+                                     ConvertibleTransparent<int>,
+                                     int>);
+static_assert(!CanInsertOrAssignIter<std::flat_map<int, ConstructFrom<V>, TransparentComparator>,
+                                     ConvertibleTransparent<int>,
+                                     V>);
+static_assert(
+    !CanInsertOrAssignIter<std::flat_map<int, AssignFrom<V>, TransparentComparator>, ConvertibleTransparent<int>, V>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+  {
+    // pair<iterator, bool> insert_or_assign(const key_type& k, M&& obj);
+    using R = std::pair<typename M::iterator, bool>;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(i, Moveable(i, (double)i));
+    assert(m.size() == 10);
+
+    for (int i = 0; i < 20; i += 2) {
+      Moveable mv(i + 1, i + 1);
+      std::same_as<R> decltype(auto) r1 = m.insert_or_assign(ConvertibleTransparent<int>{i}, std::move(mv));
+      assert(m.size() == 10);
+      assert(!r1.second);                      // was not inserted
+      assert(mv.moved());                      // was moved from
+      assert(r1.first->first == i);            // key
+      assert(r1.first->second.get() == i + 1); // value
+    }
+
+    Moveable mv1(5, 5.0);
+    std::same_as<R> decltype(auto) r2 = m.insert_or_assign(ConvertibleTransparent<int>{-1}, std::move(mv1));
+    assert(m.size() == 11);
+    assert(r2.second);                   // was inserted
+    assert(mv1.moved());                 // was moved from
+    assert(r2.first->first == -1);       // key
+    assert(r2.first->second.get() == 5); // value
+
+    Moveable mv2(9, 9.0);
+    std::same_as<R> decltype(auto) r3 = m.insert_or_assign(ConvertibleTransparent<int>{3}, std::move(mv2));
+    assert(m.size() == 12);
+    assert(r3.second);                   // was inserted
+    assert(mv2.moved());                 // was moved from
+    assert(r3.first->first == 3);        // key
+    assert(r3.first->second.get() == 9); // value
+
+    Moveable mv3(-1, 5.0);
+    std::same_as<R> decltype(auto) r4 = m.insert_or_assign(ConvertibleTransparent<int>{117}, std::move(mv3));
+    assert(m.size() == 13);
+    assert(r4.second);                    // was inserted
+    assert(mv3.moved());                  // was moved from
+    assert(r4.first->first == 117);       // key
+    assert(r4.first->second.get() == -1); // value
+  }
+  {
+    // iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj);
+    using R = M::iterator;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(i, Moveable(i, (double)i));
+    assert(m.size() == 10);
+    typename M::const_iterator it = m.find(2);
+
+    Moveable mv1(3, 3.0);
+    std::same_as<R> decltype(auto) r1 = m.insert_or_assign(it, ConvertibleTransparent<int>{2}, std::move(mv1));
+    assert(m.size() == 10);
+    assert(mv1.moved());           // was moved from
+    assert(r1->first == 2);        // key
+    assert(r1->second.get() == 3); // value
+
+    Moveable mv2(5, 5.0);
+    std::same_as<R> decltype(auto) r2 = m.insert_or_assign(it, ConvertibleTransparent<int>{3}, std::move(mv2));
+    assert(m.size() == 11);
+    assert(mv2.moved());           // was moved from
+    assert(r2->first == 3);        // key
+    assert(r2->second.get() == 5); // value
+
+    // wrong hint: begin()
+    Moveable mv3(7, 7.0);
+    std::same_as<R> decltype(auto) r3 = m.insert_or_assign(m.begin(), ConvertibleTransparent<int>{4}, std::move(mv3));
+    assert(m.size() == 11);
+    assert(mv3.moved());           // was moved from
+    assert(r3->first == 4);        // key
+    assert(r3->second.get() == 7); // value
+
+    Moveable mv4(9, 9.0);
+    std::same_as<R> decltype(auto) r4 = m.insert_or_assign(m.begin(), ConvertibleTransparent<int>{5}, std::move(mv4));
+    assert(m.size() == 12);
+    assert(mv4.moved());           // was moved from
+    assert(r4->first == 5);        // key
+    assert(r4->second.get() == 9); // value
+
+    // wrong hint: end()
+    Moveable mv5(11, 11.0);
+    std::same_as<R> decltype(auto) r5 = m.insert_or_assign(m.end(), ConvertibleTransparent<int>{6}, std::move(mv5));
+    assert(m.size() == 12);
+    assert(mv5.moved());            // was moved from
+    assert(r5->first == 6);         // key
+    assert(r5->second.get() == 11); // value
+
+    Moveable mv6(13, 13.0);
+    std::same_as<R> decltype(auto) r6 = m.insert_or_assign(m.end(), ConvertibleTransparent<int>{7}, std::move(mv6));
+    assert(m.size() == 13);
+    assert(mv6.moved());            // was moved from
+    assert(r6->first == 7);         // key
+    assert(r6->second.get() == 13); // value
+
+    // wrong hint: third element
+    Moveable mv7(15, 15.0);
+    std::same_as<R> decltype(auto) r7 =
+        m.insert_or_assign(std::next(m.begin(), 2), ConvertibleTransparent<int>{8}, std::move(mv7));
+    assert(m.size() == 13);
+    assert(mv7.moved());            // was moved from
+    assert(r7->first == 8);         // key
+    assert(r7->second.get() == 15); // value
+
+    Moveable mv8(17, 17.0);
+    std::same_as<R> decltype(auto) r8 =
+        m.insert_or_assign(std::next(m.begin(), 2), ConvertibleTransparent<int>{9}, std::move(mv8));
+    assert(m.size() == 14);
+    assert(mv8.moved());            // was moved from
+    assert(r8->first == 9);         // key
+    assert(r8->second.get() == 17); // value
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<Moveable>>();
+  test<std::deque<int>, std::vector<Moveable>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<Moveable>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<Moveable, min_allocator<Moveable>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto p = m.insert_or_assign(ConvertibleTransparent<int>{3}, 5);
+    assert(!p.second);
+    assert(transparent_used);
+  }
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.insert_or_assign(m.begin(), ConvertibleTransparent<int>{3}, 5);
+    assert(it->second == 5);
+    assert(transparent_used);
+  }
+
+  {
+    auto insert_or_assign = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      m.insert_or_assign(ConvertibleTransparent<Key>{key_arg}, value_arg);
+    };
+    test_emplace_exception_guarantee(insert_or_assign);
+  }
+
+  {
+    auto insert_or_assign_iter = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      m.insert_or_assign(m.begin(), ConvertibleTransparent<Key>{key_arg}, value_arg);
+    };
+    test_emplace_exception_guarantee(insert_or_assign_iter);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp
new file mode 100644
index 000000000000000..a2e64431a3c255a
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range.pass.cpp
@@ -0,0 +1,109 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<container-compatible-range<value_type> R>
+//   void insert_range(R&& rg);
+
+#include <algorithm>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "MoveOnly.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// test constraint container-compatible-range
+template <class M, class R>
+concept CanInsertRange = requires(M m, R&& r) { m.insert_range(std::forward<R>(r)); };
+
+using Map = std::flat_map<int, double>;
+
+static_assert(CanInsertRange<Map, std::ranges::subrange<std::pair<int, double>*>>);
+static_assert(CanInsertRange<Map, std::ranges::subrange<std::pair<short, double>*>>);
+static_assert(!CanInsertRange<Map, std::ranges::subrange<int*>>);
+static_assert(!CanInsertRange<Map, std::ranges::subrange<double*>>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+
+  {
+    using P                 = std::pair<int, int>;
+    using M                 = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+    using It                = forward_iterator<const P*>;
+    M m                     = {{10, 1}, {8, 2}, {5, 3}, {2, 4}, {1, 5}};
+    P ar[]                  = {{3, 1}, {1, 2}, {4, 3}, {1, 4}, {5, 5}, {9, 6}};
+    std::ranges::subrange r = {It(ar), It(ar + 6)};
+    static_assert(std::ranges::common_range<decltype(r)>);
+    m.insert_range(r);
+    assert((m == M{{1, 5}, {2, 4}, {3, 1}, {4, 3}, {5, 3}, {8, 2}, {9, 6}, {10, 1}}));
+  }
+  {
+    using P                 = std::pair<int, int>;
+    using M                 = std::flat_map<Key, Value, std::greater<>, KeyContainer, ValueContainer>;
+    using It                = cpp20_input_iterator<const P*>;
+    M m                     = {{8, 1}, {5, 2}, {3, 3}, {2, 4}};
+    P ar[]                  = {{3, 1}, {1, 2}, {4, 3}, {1, 4}, {5, 5}, {9, 6}};
+    std::ranges::subrange r = {It(ar), sentinel_wrapper<It>(It(ar + 6))};
+    static_assert(!std::ranges::common_range<decltype(r)>);
+    m.insert_range(r);
+    assert((m == M{{1, 2}, {2, 4}, {3, 3}, {4, 3}, {5, 2}, {8, 1}, {9, 6}}));
+  }
+  {
+    // The "uniquing" part uses the comparator, not operator==.
+    struct ModTen {
+      bool operator()(int a, int b) const { return (a % 10) < (b % 10); }
+    };
+    using P = std::pair<int, int>;
+    using M = std::flat_map<Key, Value, ModTen, KeyContainer, ValueContainer>;
+    M m     = {{21, 0}, {43, 0}, {15, 0}, {37, 0}};
+    P ar[]  = {{33, 1}, {18, 1}, {55, 1}, {18, 1}, {42, 1}};
+    m.insert_range(ar);
+    assert((m == M{{21, 0}, {42, 1}, {43, 0}, {15, 0}, {37, 0}, {18, 1}}));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+  {
+    // Items are forwarded correctly from the input range (P2767).
+    std::pair<MoveOnly, MoveOnly> a[] = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    std::flat_map<MoveOnly, MoveOnly> m;
+    m.insert_range(a | std::views::as_rvalue);
+    std::pair<MoveOnly, MoveOnly> expected[] = {{1, 1}, {3, 3}, {4, 4}, {5, 5}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // The element type of the range doesn't need to be std::pair (P2767).
+    std::pair<int, int> pa[] = {{3, 3}, {1, 1}, {4, 4}, {1, 1}, {5, 5}};
+    std::deque<std::reference_wrapper<std::pair<int, int>>> a(pa, pa + 5);
+    std::flat_map<int, int> m;
+    m.insert_range(a);
+    std::pair<int, int> expected[] = {{1, 1}, {3, 3}, {4, 4}, {5, 5}};
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    auto insert_func = [](auto& m, const auto& newValues) { m.insert_range(newValues); };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp
new file mode 100644
index 000000000000000..fabcb1d216a78a1
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_range_stability.pass.cpp
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<container-compatible-range<value_type> R>
+//   void insert_range(R&& rg);
+//
+// libc++ uses stable_sort to ensure that flat_map's behavior matches map's,
+// in terms of which duplicate items are kept.
+// This tests a conforming extension.
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <flat_map>
+#include <random>
+#include <ranges>
+#include <map>
+#include <vector>
+#include <utility>
+
+#include "test_macros.h"
+
+struct Mod256 {
+  bool operator()(int x, int y) const { return (x % 256) < (y % 256); }
+};
+
+int main(int, char**) {
+  {
+    std::mt19937 randomness;
+    std::pair<uint16_t, uint16_t> pairs[400];
+    for (int i = 0; i < 400; ++i) {
+      uint16_t r = randomness();
+      pairs[i]   = {r, r};
+    }
+
+    std::map<uint16_t, uint16_t, Mod256> m(pairs, pairs + 200);
+    std::flat_map<uint16_t, uint16_t, Mod256> fm(std::sorted_unique, m.begin(), m.end());
+    assert(std::ranges::equal(fm, m));
+
+    fm.insert_range(std::views::counted(pairs + 200, 200));
+    m.insert(pairs + 200, pairs + 400);
+    assert(fm.size() == m.size());
+    LIBCPP_ASSERT(std::ranges::equal(fm, m));
+  }
+
+  {
+    std::vector<std::pair<int, int>> v{{1, 2}, {1, 3}};
+    std::flat_map<int, int> m;
+    m.insert_range(v);
+    assert(m.size() == 1);
+    LIBCPP_ASSERT(m[1] == 2);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp
new file mode 100644
index 000000000000000..9ea7a6a6366664a
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_rv.pass.cpp
@@ -0,0 +1,124 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// class flat_map
+
+// pair<iterator, bool> insert( value_type&& v);
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+template <class Container, class Pair>
+void do_insert_rv_test() {
+  using M = Container;
+  using P = Pair;
+  using R = std::pair<typename M::iterator, bool>;
+  M m;
+  std::same_as<R> decltype(auto) r = m.insert(P(2, 2));
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 1);
+  assert(r.first->first == 2);
+  assert(r.first->second == 2);
+
+  r = m.insert(P(1, 1));
+  assert(r.second);
+  assert(r.first == m.begin());
+  assert(m.size() == 2);
+  assert(r.first->first == 1);
+  assert(r.first->second == 1);
+
+  r = m.insert(P(3, 3));
+  assert(r.second);
+  assert(r.first == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r.first->first == 3);
+  assert(r.first->second == 3);
+
+  r = m.insert(P(3, 3));
+  assert(!r.second);
+  assert(r.first == std::ranges::prev(m.end()));
+  assert(m.size() == 3);
+  assert(r.first->first == 3);
+  assert(r.first->second == 3);
+}
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  using P  = std::pair<Key, Value>;
+  using CP = std::pair<const Key, Value>;
+
+  do_insert_rv_test<M, P>();
+  do_insert_rv_test<M, CP>();
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<MoveOnly>>();
+  test<std::deque<int>, std::vector<MoveOnly>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<MoveOnly>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<MoveOnly, min_allocator<MoveOnly>>>();
+
+  {
+    using M = std::flat_map<int, MoveOnly>;
+    using R = std::pair<M::iterator, bool>;
+    M m;
+    R r = m.insert({2, MoveOnly(2)});
+    assert(r.second);
+    assert(r.first == m.begin());
+    assert(m.size() == 1);
+    assert(r.first->first == 2);
+    assert(r.first->second == 2);
+
+    r = m.insert({1, MoveOnly(1)});
+    assert(r.second);
+    assert(r.first == m.begin());
+    assert(m.size() == 2);
+    assert(r.first->first == 1);
+    assert(r.first->second == 1);
+
+    r = m.insert({3, MoveOnly(3)});
+    assert(r.second);
+    assert(r.first == std::ranges::prev(m.end()));
+    assert(m.size() == 3);
+    assert(r.first->first == 3);
+    assert(r.first->second == 3);
+
+    r = m.insert({3, MoveOnly(3)});
+    assert(!r.second);
+    assert(r.first == std::ranges::prev(m.end()));
+    assert(m.size() == 3);
+    assert(r.first->first == 3);
+    assert(r.first->second == 3);
+  }
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using value_type = typename FlatMap::value_type;
+      value_type p(std::piecewise_construct, std::tuple(key_arg), std::tuple(value_arg));
+      m.insert(std::move(p));
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp
new file mode 100644
index 000000000000000..08d2caf34987916
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_initializer_list.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// void insert(initializer_list<value_type> il);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  using V = std::pair<const Key, Value>;
+  M m     = {{1, 1}, {1, 1.5}, {1, 2}, {3, 1}, {3, 1.5}, {3, 2}};
+  m.insert(std::sorted_unique,
+           {
+               {0, 1},
+               {1, 2},
+               {2, 1},
+               {4, 1},
+           });
+  assert(m.size() == 5);
+  assert(std::distance(m.begin(), m.end()) == 5);
+  assert(*m.begin() == V(0, 1));
+  assert(*std::next(m.begin()) == V(1, 1));
+  assert(*std::next(m.begin(), 2) == V(2, 1));
+  assert(*std::next(m.begin(), 3) == V(3, 1));
+  assert(*std::next(m.begin(), 4) == V(4, 1));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) {
+      using FlatMap                        = std::decay_t<decltype(m)>;
+      using value_type                     = typename FlatMap::value_type;
+      std::initializer_list<value_type> il = {{newValues[0].first, newValues[0].second}};
+      m.insert(std::sorted_unique, il);
+    };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp
new file mode 100644
index 000000000000000..18a3b571a419949
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_sorted_iter_iter.pass.cpp
@@ -0,0 +1,86 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template <class InputIterator>
+//   void insert(sorted_unique_t, InputIterator first, InputIterator last);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// test constraint InputIterator
+template <class M, class... Args>
+concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward<Args>(args)...); };
+
+using Map  = std::flat_map<int, int>;
+using Pair = std::pair<int, int>;
+
+static_assert(CanInsert<Map, std::sorted_unique_t, Pair*, Pair*>);
+static_assert(CanInsert<Map, std::sorted_unique_t, cpp17_input_iterator<Pair*>, cpp17_input_iterator<Pair*>>);
+static_assert(!CanInsert<Map, std::sorted_unique_t, int, int>);
+static_assert(!CanInsert<Map, std::sorted_unique_t, cpp20_input_iterator<Pair*>, cpp20_input_iterator<Pair*>>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using P     = std::pair<Key, Value>;
+
+  P ar1[] = {
+      P(1, 1),
+      P(2, 1),
+      P(3, 1),
+  };
+
+  P ar2[] = {
+      P(0, 1),
+      P(2, 2),
+      P(4, 1),
+  };
+
+  M m;
+  m.insert(
+      std::sorted_unique, cpp17_input_iterator<P*>(ar1), cpp17_input_iterator<P*>(ar1 + sizeof(ar1) / sizeof(ar1[0])));
+  assert(m.size() == 3);
+  M expected{{1, 1}, {2, 1}, {3, 1}};
+  assert(m == expected);
+
+  m.insert(
+      std::sorted_unique, cpp17_input_iterator<P*>(ar2), cpp17_input_iterator<P*>(ar2 + sizeof(ar2) / sizeof(ar2[0])));
+  assert(m.size() == 5);
+  M expected2{{0, 1}, {1, 1}, {2, 1}, {3, 1}, {4, 1}};
+  assert(m == expected2);
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    auto insert_func = [](auto& m, const auto& newValues) {
+      m.insert(std::sorted_unique, newValues.begin(), newValues.end());
+    };
+    test_insert_range_exception_guarantee(insert_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp
new file mode 100644
index 000000000000000..75cabb70630f325
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/insert_transparent.pass.cpp
@@ -0,0 +1,167 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> pair<iterator, bool> insert(P&& x);
+// template<class K> iterator insert(const_iterator hint, P&& x);
+
+#include <algorithm>
+#include <compare>
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <tuple>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "test_iterators.h"
+#include "min_allocator.h"
+
+// Constraints: is_constructible_v<pair<key_type, mapped_type>, P> is true.
+template <class M, class... Args>
+concept CanInsert = requires(M m, Args&&... args) { m.insert(std::forward<Args>(args)...); };
+
+using Map  = std::flat_map<int, double>;
+using Iter = Map::const_iterator;
+
+static_assert(CanInsert<Map, std::pair<short, double>&&>);
+static_assert(CanInsert<Map, Iter, std::pair<short, double>&&>);
+static_assert(CanInsert<Map, std::tuple<short, double>&&>);
+static_assert(CanInsert<Map, Iter, std::tuple<short, double>&&>);
+static_assert(!CanInsert<Map, int>);
+static_assert(!CanInsert<Map, Iter, int>);
+
+static int expensive_comparisons = 0;
+static int cheap_comparisons     = 0;
+
+struct CompareCounter {
+  int i_ = 0;
+  CompareCounter(int i) : i_(i) {}
+  friend auto operator<=>(const CompareCounter& x, const CompareCounter& y) {
+    expensive_comparisons += 1;
+    return x.i_ <=> y.i_;
+  }
+  bool operator==(const CompareCounter&) const = default;
+  friend auto operator<=>(const CompareCounter& x, int y) {
+    cheap_comparisons += 1;
+    return x.i_ <=> y;
+  }
+};
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  const std::pair<int, int> expected[] = {{1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}};
+  {
+    // insert(P&&)
+    //   Unlike flat_set, here we can't use key_compare to compare value_type versus P,
+    //   so we must eagerly convert to value_type.
+    M m                   = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    expensive_comparisons = 0;
+    cheap_comparisons     = 0;
+    std::same_as<std::pair<typename M::iterator, bool>> auto p =
+        m.insert(std::make_pair(3, 3)); // conversion happens first
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(p == std::make_pair(m.begin() + 2, true));
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // insert(const_iterator, P&&)
+    M m                                        = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    expensive_comparisons                      = 0;
+    cheap_comparisons                          = 0;
+    std::same_as<typename M::iterator> auto it = m.insert(m.begin(), std::make_pair(3, 3));
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(it == m.begin() + 2);
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // insert(value_type&&)
+    M m                   = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    expensive_comparisons = 0;
+    cheap_comparisons     = 0;
+    std::same_as<std::pair<typename M::iterator, bool>> auto p =
+        m.insert(std::make_pair(3, 3)); // conversion happens last
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(p == std::make_pair(m.begin() + 2, true));
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // insert(const_iterator, value_type&&)
+    M m                                        = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    expensive_comparisons                      = 0;
+    cheap_comparisons                          = 0;
+    std::same_as<typename M::iterator> auto it = m.insert(m.begin(), std::make_pair(3, 3));
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(it == m.begin() + 2);
+    assert(std::ranges::equal(m, expected));
+  }
+  {
+    // emplace(Args&&...)
+    M m                   = {{1, 1}, {2, 2}, {4, 4}, {5, 5}};
+    expensive_comparisons = 0;
+    cheap_comparisons     = 0;
+    std::same_as<std::pair<typename M::iterator, bool>> auto p =
+        m.emplace(std::make_pair(3, 3)); // conversion happens first
+    assert(expensive_comparisons >= 2);
+    assert(cheap_comparisons == 0);
+    assert(p == std::make_pair(m.begin() + 2, true));
+    assert(std::ranges::equal(m, expected));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<CompareCounter>, std::vector<double>>();
+  test<std::deque<CompareCounter>, std::vector<double>>();
+  test<MinSequenceContainer<CompareCounter>, MinSequenceContainer<double>>();
+  test<std::vector<CompareCounter, min_allocator<CompareCounter>>, std::vector<double, min_allocator<double>>>();
+
+  {
+    // no ambiguity between insert(pos, P&&) and insert(first, last)
+    using M = std::flat_map<int, int>;
+    struct Evil {
+      operator M::value_type() const;
+      operator M::const_iterator() const;
+    };
+    std::flat_map<int, int> m;
+    ASSERT_SAME_TYPE(decltype(m.insert(Evil())), std::pair<M::iterator, bool>);
+    ASSERT_SAME_TYPE(decltype(m.insert(m.begin(), Evil())), M::iterator);
+    ASSERT_SAME_TYPE(decltype(m.insert(m.begin(), m.end())), void);
+  }
+  {
+    auto insert_func = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using tuple_type = std::tuple<typename FlatMap::key_type, typename FlatMap::mapped_type>;
+      tuple_type t(key_arg, value_arg);
+      m.insert(t);
+    };
+    test_emplace_exception_guarantee(insert_func);
+  }
+  {
+    auto insert_func_iter = [](auto& m, auto key_arg, auto value_arg) {
+      using FlatMap    = std::decay_t<decltype(m)>;
+      using tuple_type = std::tuple<typename FlatMap::key_type, typename FlatMap::mapped_type>;
+      tuple_type t(key_arg, value_arg);
+      m.insert(m.begin(), t);
+    };
+    test_emplace_exception_guarantee(insert_func_iter);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp
new file mode 100644
index 000000000000000..5ca811d76152014
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/replace.pass.cpp
@@ -0,0 +1,80 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// void replace(key_container_type&& key_cont, mapped_container_type&& mapped_cont);
+
+#include <algorithm>
+#include <deque>
+#include <concepts>
+#include <flat_map>
+#include <functional>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class T, class... Args>
+concept CanReplace = requires(T t, Args&&... args) { t.replace(std::forward<Args>(args)...); };
+
+using Map = std::flat_map<int, int>;
+static_assert(CanReplace<Map, std::vector<int>, std::vector<int>>);
+static_assert(!CanReplace<Map, const std::vector<int>&, std::vector<int>>);
+static_assert(!CanReplace<Map, std::vector<int>, const std::vector<int>&>);
+static_assert(!CanReplace<Map, const std::vector<int>&, const std::vector<int>&>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m                       = M({1, 2, 3}, {4, 5, 6});
+  KeyContainer new_keys     = {7, 8};
+  ValueContainer new_values = {9, 10};
+  auto expected_keys        = new_keys;
+  auto expected_values      = new_values;
+  m.replace(std::move(new_keys), std::move(new_values));
+  assert(m.size() == 2);
+  assert(std::ranges::equal(m.keys(), expected_keys));
+  assert(std::ranges::equal(m.values(), expected_values));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnMoveContainer<int>;
+    using M              = std::flat_map<int, int, std::ranges::less, KeyContainer, ValueContainer>;
+
+    M m;
+    m.emplace(1, 1);
+    m.emplace(2, 2);
+    try {
+      KeyContainer new_keys{3, 4};
+      ValueContainer new_values{5, 6};
+      m.replace(std::move(new_keys), std::move(new_values));
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we clear the map
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+#endif
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp
new file mode 100644
index 000000000000000..f9708aac62c7eec
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_exception.pass.cpp
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+// `check_assertion.h` requires Unix headers and regex support.
+// REQUIRES: has-unix-headers
+// UNSUPPORTED: no-localization
+// UNSUPPORTED: no-exceptions
+
+// <flat_map>
+
+// void swap(flat_map& y) noexcept;
+// friend void swap(flat_map& x, flat_map& y) noexcept
+
+// Test that std::terminate is called if any exception is thrown during swap
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "test_macros.h"
+#include "../helpers.h"
+#include "check_assertion.h"
+
+template <class F>
+void test_swap_exception_guarantee([[maybe_unused]] F&& swap_function) {
+  {
+    // key swap throws
+    using KeyContainer   = ThrowOnMoveContainer<int>;
+    using ValueContainer = std::vector<int>;
+    using M              = std::flat_map<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    M m1, m2;
+    m1.emplace(1, 1);
+    m1.emplace(2, 2);
+    m2.emplace(3, 3);
+    m2.emplace(4, 4);
+    // swap is noexcept
+    EXPECT_STD_TERMINATE([&] { swap_function(m1, m2); });
+  }
+
+  {
+    // value swap throws
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnMoveContainer<int>;
+    using M              = std::flat_map<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    M m1, m2;
+    m1.emplace(1, 1);
+    m1.emplace(2, 2);
+    m2.emplace(3, 3);
+    m2.emplace(4, 4);
+
+    // swap is noexcept
+    EXPECT_STD_TERMINATE([&] { swap_function(m1, m2); });
+  }
+}
+
+int main(int, char**) {
+  {
+    auto swap_func = [](auto& m1, auto& m2) { swap(m1, m2); };
+    test_swap_exception_guarantee(swap_func);
+  }
+
+  {
+    auto swap_func = [](auto& m1, auto& m2) { m1.swap(m2); };
+    test_swap_exception_guarantee(swap_func);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp
new file mode 100644
index 000000000000000..98c60c1488cf532
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_free.pass.cpp
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// friend void swap(flat_map& x, flat_map& y) noexcept
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+// test noexcept
+
+template <class T>
+concept NoExceptAdlSwap = requires(T t1, T t2) {
+  { swap(t1, t2) } noexcept;
+};
+
+static_assert(NoExceptAdlSwap<std::flat_map<int, int>>);
+
+#ifndef TEST_HAS_NO_EXCEPTIONS
+static_assert(
+    NoExceptAdlSwap<std::flat_map<int, int, std::less<int>, ThrowOnMoveContainer<int>, ThrowOnMoveContainer<int>>>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using V     = std::pair<const Key, Value>;
+
+  {
+    M m1;
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)};
+    M m1;
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)};
+    V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    swap(m1, m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp
new file mode 100644
index 000000000000000..d2d8f5673edeb43
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/swap_member.pass.cpp
@@ -0,0 +1,95 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// void swap(flat_map& y) noexcept;
+
+#include <flat_map>
+#include <cassert>
+#include <deque>
+#include <functional>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "MoveOnly.h"
+#include "min_allocator.h"
+#include "test_macros.h"
+#include "../helpers.h"
+
+// test noexcept
+
+template <class T>
+concept NoExceptMemberSwap = requires(T t1, T t2) {
+  { t1.swap(t2) } noexcept;
+};
+
+static_assert(NoExceptMemberSwap<std::flat_map<int, int>>);
+#ifndef TEST_HAS_NO_EXCEPTIONS
+static_assert(
+    NoExceptMemberSwap<std::flat_map<int, int, std::less<int>, ThrowOnMoveContainer<int>, ThrowOnMoveContainer<int>>>);
+#endif
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+  using V     = std::pair<const Key, Value>;
+  {
+    M m1;
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)};
+    M m1;
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2;
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+  {
+    V ar1[] = {V(1, 1), V(2, 2), V(3, 3), V(4, 4)};
+    V ar2[] = {V(5, 5), V(6, 6), V(7, 7), V(8, 8), V(9, 9), V(10, 10), V(11, 11), V(12, 12)};
+    M m1(ar1, ar1 + sizeof(ar1) / sizeof(ar1[0]));
+    M m2(ar2, ar2 + sizeof(ar2) / sizeof(ar2[0]));
+    M m1_save = m1;
+    M m2_save = m2;
+    m1.swap(m2);
+    assert(m1 == m2_save);
+    assert(m2 == m1_save);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<double>>();
+  test<std::deque<int>, std::vector<double>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<double>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<double, min_allocator<double>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp
new file mode 100644
index 000000000000000..4be2fe1c4333e02
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace.pass.cpp
@@ -0,0 +1,246 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class... Args>
+//   pair<iterator, bool> try_emplace(const key_type& k, Args&&... args);
+// template<class... Args>
+//   pair<iterator, bool> try_emplace(key_type&& k, Args&&... args);
+// template<class... Args>
+//   iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args);
+// template<class... Args>
+//   iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+#include "../../../Emplaceable.h"
+
+// Constraints: is_constructible_v<mapped_type, Args...> is true.
+template <class M, class... Args>
+concept CanTryEmplace = requires(M m, Args&&... args) { m.try_emplace(std::forward<Args>(args)...); };
+
+using Map  = std::flat_map<Emplaceable, Emplaceable>;
+using Iter = typename Map::const_iterator;
+static_assert(!CanTryEmplace<Map>);
+
+static_assert(CanTryEmplace<Map, const Emplaceable&>);
+static_assert(CanTryEmplace<Map, const Emplaceable&, Emplaceable>);
+static_assert(CanTryEmplace<Map, const Emplaceable&, int, double>);
+static_assert(!CanTryEmplace<Map, const Emplaceable&, const Emplaceable&>);
+static_assert(!CanTryEmplace<Map, const Emplaceable&, int>);
+
+static_assert(CanTryEmplace<Map, Emplaceable>);
+static_assert(CanTryEmplace<Map, Emplaceable, Emplaceable>);
+static_assert(CanTryEmplace<Map, Emplaceable, int, double>);
+static_assert(!CanTryEmplace<Map, Emplaceable, const Emplaceable&>);
+static_assert(!CanTryEmplace<Map, Emplaceable, int>);
+
+static_assert(CanTryEmplace<Map, Iter, const Emplaceable&>);
+static_assert(CanTryEmplace<Map, Iter, const Emplaceable&, Emplaceable>);
+static_assert(CanTryEmplace<Map, Iter, const Emplaceable&, int, double>);
+static_assert(!CanTryEmplace<Map, Iter, const Emplaceable&, const Emplaceable&>);
+static_assert(!CanTryEmplace<Map, Iter, const Emplaceable&, int>);
+
+static_assert(CanTryEmplace<Map, Iter, Emplaceable>);
+static_assert(CanTryEmplace<Map, Iter, Emplaceable, Emplaceable>);
+static_assert(CanTryEmplace<Map, Iter, Emplaceable, int, double>);
+static_assert(!CanTryEmplace<Map, Iter, Emplaceable, const Emplaceable&>);
+static_assert(!CanTryEmplace<Map, Iter, Emplaceable, int>);
+
+template <class KeyContainer, class ValueContainer>
+void test_ck() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  { // pair<iterator, bool> try_emplace(const key_type& k, Args&&... args);
+    using R = std::pair<typename M::iterator, bool>;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(i, Moveable(i, (double)i));
+
+    assert(m.size() == 10);
+
+    Moveable mv1(3, 3.0);
+    for (int i = 0; i < 20; i += 2) {
+      std::same_as<R> decltype(auto) r = m.try_emplace(i, std::move(mv1));
+      assert(m.size() == 10);
+      assert(!r.second);           // was not inserted
+      assert(!mv1.moved());        // was not moved from
+      assert(r.first->first == i); // key
+    }
+
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(-1, std::move(mv1));
+    assert(m.size() == 11);
+    assert(r2.second);                   // was inserted
+    assert(mv1.moved());                 // was moved from
+    assert(r2.first->first == -1);       // key
+    assert(r2.first->second.get() == 3); // value
+
+    Moveable mv2(5, 3.0);
+    std::same_as<R> decltype(auto) r3 = m.try_emplace(5, std::move(mv2));
+    assert(m.size() == 12);
+    assert(r3.second);                   // was inserted
+    assert(mv2.moved());                 // was moved from
+    assert(r3.first->first == 5);        // key
+    assert(r3.first->second.get() == 5); // value
+
+    Moveable mv3(-1, 3.0);
+    std::same_as<R> decltype(auto) r4 = m.try_emplace(117, std::move(mv2));
+    assert(m.size() == 13);
+    assert(r4.second);                    // was inserted
+    assert(mv2.moved());                  // was moved from
+    assert(r4.first->first == 117);       // key
+    assert(r4.first->second.get() == -1); // value
+  }
+
+  { // iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args);
+    using R = typename M::iterator;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.try_emplace(i, Moveable(i, (double)i));
+    assert(m.size() == 10);
+    typename M::const_iterator it = m.find(2);
+
+    Moveable mv1(3, 3.0);
+    for (int i = 0; i < 20; i += 2) {
+      std::same_as<R> decltype(auto) r1 = m.try_emplace(it, i, std::move(mv1));
+      assert(m.size() == 10);
+      assert(!mv1.moved());          // was not moved from
+      assert(r1->first == i);        // key
+      assert(r1->second.get() == i); // value
+    }
+
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(it, 3, std::move(mv1));
+    assert(m.size() == 11);
+    assert(mv1.moved());           // was moved from
+    assert(r2->first == 3);        // key
+    assert(r2->second.get() == 3); // value
+  }
+}
+
+template <class KeyContainer, class ValueContainer>
+void test_rk() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  { // pair<iterator, bool> try_emplace(key_type&& k, Args&&... args);
+    using R = std::pair<typename M::iterator, bool>;
+    M m;
+    for (int i = 0; i < 20; i += 2) {
+      m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1));
+    }
+    assert(m.size() == 10);
+
+    Moveable mvkey1(2, 2.0);
+    Moveable mv1(4, 4.0);
+    std::same_as<R> decltype(auto) r1 = m.try_emplace(std::move(mvkey1), std::move(mv1));
+    assert(m.size() == 10);
+    assert(!r1.second);                // was not inserted
+    assert(!mv1.moved());              // was not moved from
+    assert(!mvkey1.moved());           // was not moved from
+    assert(r1.first->first == mvkey1); // key
+
+    Moveable mvkey2(3, 3.0);
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(std::move(mvkey2), std::move(mv1));
+    assert(m.size() == 11);
+    assert(r2.second);                   // was inserted
+    assert(mv1.moved());                 // was moved from
+    assert(mvkey2.moved());              // was moved from
+    assert(r2.first->first.get() == 3);  // key
+    assert(r2.first->second.get() == 4); // value
+  }
+
+  { // iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args);
+    using R = typename M::iterator;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(Moveable(i, (double)i), Moveable(i + 1, (double)i + 1));
+    assert(m.size() == 10);
+    typename M::const_iterator it = std::next(m.cbegin());
+
+    Moveable mvkey1(2, 2.0);
+    Moveable mv1(4, 4.0);
+    std::same_as<R> decltype(auto) r1 = m.try_emplace(it, std::move(mvkey1), std::move(mv1));
+    assert(m.size() == 10);
+    assert(!mv1.moved());        // was not moved from
+    assert(!mvkey1.moved());     // was not moved from
+    assert(r1->first == mvkey1); // key
+
+    Moveable mvkey2(3, 3.0);
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(it, std::move(mvkey2), std::move(mv1));
+    assert(m.size() == 11);
+    assert(mv1.moved());           // was moved from
+    assert(mvkey2.moved());        // was moved from
+    assert(r2->first.get() == 3);  // key
+    assert(r2->second.get() == 4); // value
+  }
+}
+
+int main(int, char**) {
+  test_ck<std::vector<int>, std::vector<Moveable>>();
+  test_ck<std::deque<int>, std::vector<Moveable>>();
+  test_ck<MinSequenceContainer<int>, MinSequenceContainer<Moveable>>();
+  test_ck<std::vector<int, min_allocator<int>>, std::vector<Moveable, min_allocator<Moveable>>>();
+
+  test_rk<std::vector<Moveable>, std::vector<Moveable>>();
+  test_rk<std::deque<Moveable>, std::vector<Moveable>>();
+  test_rk<MinSequenceContainer<Moveable>, MinSequenceContainer<Moveable>>();
+  test_rk<std::vector<Moveable, min_allocator<Moveable>>, std::vector<Moveable, min_allocator<Moveable>>>();
+
+  {
+    auto try_emplace_ck = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      const Key key{key_arg};
+      m.try_emplace(key, value_arg);
+    };
+    test_emplace_exception_guarantee(try_emplace_ck);
+  }
+
+  {
+    auto try_emplace_rk = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      m.try_emplace(Key{key_arg}, value_arg);
+    };
+    test_emplace_exception_guarantee(try_emplace_rk);
+  }
+
+  {
+    auto try_emplace_iter_ck = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      const Key key{key_arg};
+      m.try_emplace(m.begin(), key, value_arg);
+    };
+    test_emplace_exception_guarantee(try_emplace_iter_ck);
+  }
+
+  {
+    auto try_emplace_iter_rk = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      m.try_emplace(m.begin(), Key{key_arg}, value_arg);
+    };
+    test_emplace_exception_guarantee(try_emplace_iter_rk);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp
new file mode 100644
index 000000000000000..21fda437809674b
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.modifiers/try_emplace_transparent.pass.cpp
@@ -0,0 +1,182 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K, class... Args>
+//   pair<iterator, bool> try_emplace(K&& k, Args&&... args);
+// template<class K, class... Args>
+//   iterator try_emplace(const_iterator hint, K&& k, Args&&... args);
+
+#include <flat_map>
+#include <cassert>
+#include <functional>
+#include <deque>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "../helpers.h"
+#include "min_allocator.h"
+#include "../../../Emplaceable.h"
+
+// Constraints:
+// The qualified-id Compare::is_transparent is valid and denotes a type.
+// is_constructible_v<key_type, K> is true.
+// is_constructible_v<mapped_type, Args...> is true.
+// For the first overload, is_convertible_v<K&&, const_iterator> and is_convertible_v<K&&, iterator> are both false
+template <class M, class... Args>
+concept CanTryEmplace = requires(M m, Args&&... args) { m.try_emplace(std::forward<Args>(args)...); };
+
+using TransparentMap    = std::flat_map<int, Emplaceable, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, Emplaceable, NonTransparentComparator>;
+
+using TransparentMapIter      = typename TransparentMap::iterator;
+using TransparentMapConstIter = typename TransparentMap::const_iterator;
+
+static_assert(!CanTryEmplace<TransparentMap>);
+static_assert(!CanTryEmplace<NonTransparentMap>);
+
+static_assert(CanTryEmplace<TransparentMap, ConvertibleTransparent<int>>);
+static_assert(CanTryEmplace<TransparentMap, ConvertibleTransparent<int>, Emplaceable>);
+static_assert(CanTryEmplace<TransparentMap, ConvertibleTransparent<int>, int, double>);
+static_assert(!CanTryEmplace<TransparentMap, ConvertibleTransparent<int>, const Emplaceable&>);
+static_assert(!CanTryEmplace<TransparentMap, ConvertibleTransparent<int>, int>);
+static_assert(!CanTryEmplace<TransparentMap, NonConvertibleTransparent<int>, Emplaceable>);
+static_assert(!CanTryEmplace<NonTransparentMap, NonConvertibleTransparent<int>, Emplaceable>);
+static_assert(!CanTryEmplace<TransparentMap, ConvertibleTransparent<int>, int>);
+static_assert(!CanTryEmplace<TransparentMap, TransparentMapIter, Emplaceable>);
+static_assert(!CanTryEmplace<TransparentMap, TransparentMapConstIter, Emplaceable>);
+
+static_assert(CanTryEmplace<TransparentMap, TransparentMapConstIter, ConvertibleTransparent<int>>);
+static_assert(CanTryEmplace<TransparentMap, TransparentMapConstIter, ConvertibleTransparent<int>, Emplaceable>);
+static_assert(CanTryEmplace<TransparentMap, TransparentMapConstIter, ConvertibleTransparent<int>, int, double>);
+static_assert(!CanTryEmplace<TransparentMap, TransparentMapConstIter, ConvertibleTransparent<int>, const Emplaceable&>);
+static_assert(!CanTryEmplace<TransparentMap, TransparentMapConstIter, ConvertibleTransparent<int>, int>);
+static_assert(!CanTryEmplace<TransparentMap, TransparentMapConstIter, NonConvertibleTransparent<int>, Emplaceable>);
+static_assert(!CanTryEmplace<NonTransparentMap, TransparentMapConstIter, NonConvertibleTransparent<int>, Emplaceable>);
+static_assert(!CanTryEmplace<TransparentMap, TransparentMapConstIter, ConvertibleTransparent<int>, int>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  { // pair<iterator, bool> try_emplace(K&& k, Args&&... args);
+    using R = std::pair<typename M::iterator, bool>;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.emplace(i, Moveable(i, (double)i));
+
+    assert(m.size() == 10);
+
+    Moveable mv1(3, 3.0);
+    for (int i = 0; i < 20; i += 2) {
+      std::same_as<R> decltype(auto) r = m.try_emplace(ConvertibleTransparent<int>{i}, std::move(mv1));
+      assert(m.size() == 10);
+      assert(!r.second);           // was not inserted
+      assert(!mv1.moved());        // was not moved from
+      assert(r.first->first == i); // key
+    }
+
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(ConvertibleTransparent<int>{-1}, std::move(mv1));
+    assert(m.size() == 11);
+    assert(r2.second);                   // was inserted
+    assert(mv1.moved());                 // was moved from
+    assert(r2.first->first == -1);       // key
+    assert(r2.first->second.get() == 3); // value
+
+    Moveable mv2(5, 3.0);
+    std::same_as<R> decltype(auto) r3 = m.try_emplace(ConvertibleTransparent<int>{5}, std::move(mv2));
+    assert(m.size() == 12);
+    assert(r3.second);                   // was inserted
+    assert(mv2.moved());                 // was moved from
+    assert(r3.first->first == 5);        // key
+    assert(r3.first->second.get() == 5); // value
+
+    Moveable mv3(-1, 3.0);
+    std::same_as<R> decltype(auto) r4 = m.try_emplace(ConvertibleTransparent<int>{117}, std::move(mv2));
+    assert(m.size() == 13);
+    assert(r4.second);                    // was inserted
+    assert(mv2.moved());                  // was moved from
+    assert(r4.first->first == 117);       // key
+    assert(r4.first->second.get() == -1); // value
+  }
+
+  { // iterator try_emplace(const_iterator hint, K&& k, Args&&... args);
+    using R = typename M::iterator;
+    M m;
+    for (int i = 0; i < 20; i += 2)
+      m.try_emplace(i, Moveable(i, (double)i));
+    assert(m.size() == 10);
+    typename M::const_iterator it = m.find(2);
+
+    Moveable mv1(3, 3.0);
+    for (int i = 0; i < 20; i += 2) {
+      std::same_as<R> decltype(auto) r1 = m.try_emplace(it, ConvertibleTransparent<int>{i}, std::move(mv1));
+      assert(m.size() == 10);
+      assert(!mv1.moved());          // was not moved from
+      assert(r1->first == i);        // key
+      assert(r1->second.get() == i); // value
+    }
+
+    std::same_as<R> decltype(auto) r2 = m.try_emplace(it, ConvertibleTransparent<int>{3}, std::move(mv1));
+    assert(m.size() == 11);
+    assert(mv1.moved());           // was moved from
+    assert(r2->first == 3);        // key
+    assert(r2->second.get() == 3); // value
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<Moveable>>();
+  test<std::deque<int>, std::vector<Moveable>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<Moveable>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<Moveable, min_allocator<Moveable>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto p = m.try_emplace(ConvertibleTransparent<int>{3}, 3);
+    assert(!p.second);
+    assert(transparent_used);
+  }
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto p = m.try_emplace(m.begin(), ConvertibleTransparent<int>{3}, 3);
+    assert(p->second == 3);
+    assert(transparent_used);
+  }
+  {
+    auto try_emplace = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      m.try_emplace(ConvertibleTransparent<Key>{key_arg}, value_arg);
+    };
+    test_emplace_exception_guarantee(try_emplace);
+  }
+
+  {
+    auto try_emplace_iter = [](auto& m, auto key_arg, auto value_arg) {
+      using M   = std::decay_t<decltype(m)>;
+      using Key = typename M::key_type;
+      m.try_emplace(m.begin(), ConvertibleTransparent<Key>{key_arg}, value_arg);
+    };
+    test_emplace_exception_guarantee(try_emplace_iter);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp
new file mode 100644
index 000000000000000..d86224952dee453
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/comp.pass.cpp
@@ -0,0 +1,96 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// key_compare key_comp() const;
+// value_compare value_comp() const;
+
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+
+#include "test_macros.h"
+
+int main(int, char**) {
+  {
+    using M    = std::flat_map<int, char>;
+    using Comp = std::less<int>; // the default
+    M m        = {};
+    ASSERT_SAME_TYPE(M::key_compare, Comp);
+    static_assert(!std::is_same_v<M::value_compare, Comp>);
+    ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp);
+    ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare);
+    Comp kc = m.key_comp();
+    assert(kc(1, 2));
+    assert(!kc(2, 1));
+    auto vc = m.value_comp();
+    ASSERT_SAME_TYPE(decltype(vc(std::make_pair(1, 2), std::make_pair(1, 2))), bool);
+    assert(vc({1, '2'}, {2, '1'}));
+    assert(!vc({2, '1'}, {1, '2'}));
+  }
+  {
+    using Comp = std::function<bool(int, int)>;
+    using M    = std::flat_map<int, int, Comp>;
+    Comp comp  = std::greater<int>();
+    M m({}, comp);
+    ASSERT_SAME_TYPE(M::key_compare, Comp);
+    ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp);
+    ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare);
+    Comp kc = m.key_comp();
+    assert(!kc(1, 2));
+    assert(kc(2, 1));
+    auto vc = m.value_comp();
+    auto a  = std::make_pair(1, 2);
+    ASSERT_SAME_TYPE(decltype(vc(a, a)), bool);
+    static_assert(!noexcept(vc(a, a)));
+    assert(!vc({1, 2}, {2, 1}));
+    assert(vc({2, 1}, {1, 2}));
+  }
+  {
+    using Comp = std::less<>;
+    using M    = std::flat_map<int, int, Comp>;
+    M m        = {};
+    ASSERT_SAME_TYPE(M::key_compare, Comp);
+    ASSERT_SAME_TYPE(decltype(m.key_comp()), Comp);
+    ASSERT_SAME_TYPE(decltype(m.value_comp()), M::value_compare);
+    Comp kc = m.key_comp();
+    assert(kc(1, 2));
+    assert(!kc(2, 1));
+    auto vc = m.value_comp();
+    auto a  = std::make_pair(1, 2);
+    ASSERT_SAME_TYPE(decltype(vc(a, a)), bool);
+    assert(vc({1, 2}, {2, 1}));
+    assert(!vc({2, 1}, {1, 2}));
+  }
+  {
+    using Comp = std::function<bool(const std::vector<int>&, const std::vector<int>&)>;
+    using M    = std::flat_map<std::vector<int>, int, Comp>;
+    Comp comp  = [i = 1](const auto& x, const auto& y) { return x[i] < y[i]; };
+    M m({}, comp);
+    auto vc = m.value_comp();
+    static_assert(sizeof(vc) >= sizeof(Comp));
+    comp = nullptr;
+    m    = M({}, nullptr);
+    assert(m.key_comp() == nullptr);
+    // At this point, m.key_comp() is disengaged.
+    // But the std::function captured by copy inside `vc` remains valid.
+    auto a = std::make_pair(std::vector<int>{2, 1, 4}, 42);
+    auto b = std::make_pair(std::vector<int>{1, 2, 3}, 42);
+    auto c = std::make_pair(std::vector<int>{0, 3, 2}, 42);
+    assert(vc(a, b));
+    assert(vc(b, c));
+    assert(!vc(b, a));
+    assert(!vc(c, b));
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp
new file mode 100644
index 000000000000000..84d8f8344aaa67e
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.observers/keys_values.pass.cpp
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// const key_container_type& keys() const noexcept
+// const mapped_container_type& values() const noexcept
+
+#include <algorithm>
+#include <cassert>
+#include <flat_map>
+#include <functional>
+#include <utility>
+#include <vector>
+#include <deque>
+#include <string>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  const M m                                                 = {{4, 'a'}, {2, 'b'}, {3, 'c'}};
+  std::same_as<const KeyContainer&> decltype(auto) keys     = m.keys();
+  std::same_as<const ValueContainer&> decltype(auto) values = m.values();
+
+  // noexcept
+  static_assert(noexcept(m.keys()));
+  static_assert(noexcept(m.values()));
+
+  auto expected_keys   = {2, 3, 4};
+  auto expected_values = {'b', 'c', 'a'};
+  assert(std::ranges::equal(keys, expected_keys));
+  assert(std::ranges::equal(values, expected_values));
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp
new file mode 100644
index 000000000000000..208d6138fa68363
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains.pass.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// bool contains(const key_type& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M = std::flat_map<Key, Value, std::less<>, KeyContainer, ValueContainer>;
+    M m     = {{1, 1}, {2, 2}, {4, 4}, {5, 5}, {8, 8}};
+    assert(!m.contains(0));
+    assert(m.contains(1));
+    assert(m.contains(2));
+    assert(!m.contains(3));
+    assert(m.contains(4));
+    assert(m.contains(5));
+    assert(!m.contains(6));
+    assert(!m.contains(7));
+    assert(std::as_const(m).contains(8));
+    assert(!std::as_const(m).contains(9));
+    m.clear();
+    assert(!m.contains(1));
+  }
+  {
+    using M = std::flat_map<Key, Value, std::greater<int>, KeyContainer, ValueContainer>;
+    M m     = {{1, 0}, {2, 0}, {4, 0}, {5, 0}, {8, 0}};
+    assert(!m.contains(0));
+    assert(m.contains(1));
+    assert(m.contains(2));
+    assert(!m.contains(3));
+    assert(m.contains(4));
+    assert(m.contains(5));
+    assert(!m.contains(6));
+    assert(!m.contains(7));
+    assert(std::as_const(m).contains(8));
+    assert(!std::as_const(m).contains(9));
+    m.clear();
+    assert(!m.contains(1));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp
new file mode 100644
index 000000000000000..0493538ab6dadc7
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/contains_transparent.pass.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> bool contains(const K& x) const;
+
+#include <cassert>
+#include <flat_map>
+#include <string>
+#include <utility>
+#include <deque>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanContains     = requires(M m, Transparent<int> k) { m.contains(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanContains<TransparentMap>);
+static_assert(CanContains<const TransparentMap>);
+static_assert(!CanContains<NonTransparentMap>);
+static_assert(!CanContains<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  ASSERT_SAME_TYPE(decltype(m.contains(Transparent<std::string>{"abc"})), bool);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).contains(Transparent<std::string>{"b"})), bool);
+  assert(m.contains(Transparent<std::string>{"alpha"}) == true);
+  assert(m.contains(Transparent<std::string>{"beta"}) == true);
+  assert(m.contains(Transparent<std::string>{"epsilon"}) == true);
+  assert(m.contains(Transparent<std::string>{"eta"}) == true);
+  assert(m.contains(Transparent<std::string>{"gamma"}) == true);
+  assert(m.contains(Transparent<std::string>{"al"}) == false);
+  assert(m.contains(Transparent<std::string>{""}) == false);
+  assert(m.contains(Transparent<std::string>{"g"}) == false);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto b = m.contains(Transparent<int>{3});
+    assert(b);
+    assert(transparent_used);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp
new file mode 100644
index 000000000000000..db675854d5e98b2
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count.pass.cpp
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// size_type count(const key_type& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+
+  {
+    using M = std::flat_map<Key, Value, std::less<>, KeyContainer, ValueContainer>;
+    M m     = {{1, 1}, {2, 2}, {4, 4}, {5, 5}, {8, 8}};
+    ASSERT_SAME_TYPE(decltype(m.count(0)), size_t);
+    assert(m.count(0) == 0);
+    assert(m.count(1) == 1);
+    assert(m.count(2) == 1);
+    assert(m.count(3) == 0);
+    assert(m.count(4) == 1);
+    assert(m.count(5) == 1);
+    assert(m.count(6) == 0);
+    assert(m.count(7) == 0);
+    assert(std::as_const(m).count(8) == 1);
+    assert(std::as_const(m).count(9) == 0);
+  }
+  {
+    using M = std::flat_map<Key, Value, std::greater<int>, KeyContainer, ValueContainer>;
+    M m     = {{1, 0}, {2, 0}, {4, 0}, {5, 0}, {8, 0}};
+    ASSERT_SAME_TYPE(decltype(m.count(0)), size_t);
+    assert(m.count(0) == 0);
+    assert(m.count(1) == 1);
+    assert(m.count(2) == 1);
+    assert(m.count(3) == 0);
+    assert(m.count(4) == 1);
+    assert(m.count(5) == 1);
+    assert(m.count(6) == 0);
+    assert(m.count(7) == 0);
+    assert(std::as_const(m).count(8) == 1);
+    assert(std::as_const(m).count(9) == 0);
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::vector<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp
new file mode 100644
index 000000000000000..cd195ff1fa8b434
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/count_transparent.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> size_type count(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanCount        = requires(M m, Transparent<int> k) { m.count(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanCount<TransparentMap>);
+static_assert(CanCount<const TransparentMap>);
+static_assert(!CanCount<NonTransparentMap>);
+static_assert(!CanCount<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  ASSERT_SAME_TYPE(decltype(m.count(Transparent<std::string>{"abc"})), typename M::size_type);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).count(Transparent<std::string>{"b"})), typename M::size_type);
+  assert(m.count(Transparent<std::string>{"alpha"}) == 1);
+  assert(m.count(Transparent<std::string>{"beta"}) == 1);
+  assert(m.count(Transparent<std::string>{"epsilon"}) == 1);
+  assert(m.count(Transparent<std::string>{"eta"}) == 1);
+  assert(m.count(Transparent<std::string>{"gamma"}) == 1);
+  assert(m.count(Transparent<std::string>{"al"}) == 0);
+  assert(m.count(Transparent<std::string>{""}) == 0);
+  assert(m.count(Transparent<std::string>{"g"}) == 0);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto n = m.count(Transparent<int>{3});
+    assert(n == 1);
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp
new file mode 100644
index 000000000000000..8fa73d2a2eb51df
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range.pass.cpp
@@ -0,0 +1,78 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// pair<iterator,iterator>             equal_range(const key_type& k);
+// pair<const_iterator,const_iterator> equal_range(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M  = std::flat_map<Key, Value, std::less<>, KeyContainer, ValueContainer>;
+    using R  = std::pair<typename M::iterator, typename M::iterator>;
+    using CR = std::pair<typename M::const_iterator, typename M::const_iterator>;
+    M m      = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.equal_range(0)), R);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(0)), CR);
+    auto begin = m.begin();
+    assert(m.equal_range(0) == std::pair(begin, begin));
+    assert(m.equal_range(1) == std::pair(begin, begin + 1));
+    assert(m.equal_range(2) == std::pair(begin + 1, begin + 2));
+    assert(m.equal_range(3) == std::pair(begin + 2, begin + 2));
+    assert(m.equal_range(4) == std::pair(begin + 2, begin + 3));
+    assert(m.equal_range(5) == std::pair(begin + 3, begin + 4));
+    assert(m.equal_range(6) == std::pair(begin + 4, begin + 4));
+    assert(m.equal_range(7) == std::pair(begin + 4, begin + 4));
+    assert(std::as_const(m).equal_range(8) == std::pair(m.cbegin() + 4, m.cbegin() + 5));
+    assert(std::as_const(m).equal_range(9) == std::pair(m.cbegin() + 5, m.cbegin() + 5));
+  }
+
+  {
+    using M  = std::flat_map<Key, Value, std::greater<int>, KeyContainer, ValueContainer>;
+    using R  = std::pair<typename M::iterator, typename M::iterator>;
+    using CR = std::pair<typename M::const_iterator, typename M::const_iterator>;
+    M m      = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.equal_range(0)), R);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(0)), CR);
+    auto begin = m.begin();
+    assert(m.equal_range(0) == std::pair(begin + 5, begin + 5));
+    assert(m.equal_range(1) == std::pair(begin + 4, begin + 5));
+    assert(m.equal_range(2) == std::pair(begin + 3, begin + 4));
+    assert(m.equal_range(3) == std::pair(begin + 3, begin + 3));
+    assert(m.equal_range(4) == std::pair(begin + 2, begin + 3));
+    assert(m.equal_range(5) == std::pair(begin + 1, begin + 2));
+    assert(m.equal_range(6) == std::pair(begin + 1, begin + 1));
+    assert(m.equal_range(7) == std::pair(begin + 1, begin + 1));
+    assert(std::as_const(m).equal_range(8) == std::pair(m.cbegin(), m.cbegin() + 1));
+    assert(std::as_const(m).equal_range(9) == std::pair(m.cbegin(), m.cbegin()));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp
new file mode 100644
index 000000000000000..0198f433bdc4f1d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/equal_range_transparent.pass.cpp
@@ -0,0 +1,100 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> pair<iterator,iterator>             equal_range(const K& x);
+// template<class K> pair<const_iterator,const_iterator> equal_range(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanEqualRange   = requires(M m, Transparent<int> k) { m.equal_range(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanEqualRange<TransparentMap>);
+static_assert(CanEqualRange<const TransparentMap>);
+static_assert(!CanEqualRange<NonTransparentMap>);
+static_assert(!CanEqualRange<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  using R        = std::pair<typename M::iterator, typename M::iterator>;
+  using CR       = std::pair<typename M::const_iterator, typename M::const_iterator>;
+  M m            = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.equal_range(Transparent<std::string>{"abc"})), R);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).equal_range(Transparent<std::string>{"b"})), CR);
+
+  auto test_found = [&](auto&& map, const std::string& expected_key, int expected_value) {
+    auto [first, last] = map.equal_range(Transparent<std::string>{expected_key});
+    assert(last - first == 1);
+    auto [key, value] = *first;
+    assert(key == expected_key);
+    assert(value == expected_value);
+  };
+
+  auto test_not_found = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto [first, last] = map.equal_range(Transparent<std::string>{expected_key});
+    assert(first == last);
+    assert(first - m.begin() == expected_offset);
+  };
+
+  test_found(m, "alpha", 1);
+  test_found(m, "beta", 2);
+  test_found(m, "epsilon", 3);
+  test_found(m, "eta", 4);
+  test_found(m, "gamma", 5);
+  test_found(cm, "alpha", 1);
+  test_found(cm, "beta", 2);
+  test_found(cm, "epsilon", 3);
+  test_found(cm, "eta", 4);
+  test_found(cm, "gamma", 5);
+
+  test_not_found(m, "charlie", 2);
+  test_not_found(m, "aaa", 0);
+  test_not_found(m, "zzz", 5);
+  test_not_found(cm, "charlie", 2);
+  test_not_found(cm, "aaa", 0);
+  test_not_found(cm, "zzz", 5);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto p = m.equal_range(Transparent<int>{3});
+    assert(p.first != p.second);
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp
new file mode 100644
index 000000000000000..9fae407c7d8f7c5
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find.pass.cpp
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+//       iterator find(const key_type& k);
+// const_iterator find(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+
+  M m = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+  ASSERT_SAME_TYPE(decltype(m.find(0)), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).find(0)), typename M::const_iterator);
+  assert(m.find(0) == m.end());
+  assert(m.find(1) == m.begin());
+  assert(m.find(2) == m.begin() + 1);
+  assert(m.find(3) == m.end());
+  assert(m.find(4) == m.begin() + 2);
+  assert(m.find(5) == m.begin() + 3);
+  assert(m.find(6) == m.end());
+  assert(m.find(7) == m.end());
+  assert(std::as_const(m).find(8) == m.begin() + 4);
+  assert(std::as_const(m).find(9) == m.end());
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp
new file mode 100644
index 000000000000000..291577a89fc8f4d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/find_transparent.pass.cpp
@@ -0,0 +1,88 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> iterator       find(const K& x);
+// template<class K> const_iterator find(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanFind         = requires(M m, Transparent<int> k) { m.find(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanFind<TransparentMap>);
+static_assert(CanFind<const TransparentMap>);
+static_assert(!CanFind<NonTransparentMap>);
+static_assert(!CanFind<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m            = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.find(Transparent<std::string>{"abc"})), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).find(Transparent<std::string>{"b"})), typename M::const_iterator);
+
+  auto test_find = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto iter = map.find(Transparent<std::string>{expected_key});
+    assert(iter - map.begin() == expected_offset);
+  };
+
+  test_find(m, "alpha", 0);
+  test_find(m, "beta", 1);
+  test_find(m, "epsilon", 2);
+  test_find(m, "eta", 3);
+  test_find(m, "gamma", 4);
+  test_find(m, "charlie", 5);
+  test_find(m, "aaa", 5);
+  test_find(m, "zzz", 5);
+  test_find(cm, "alpha", 0);
+  test_find(cm, "beta", 1);
+  test_find(cm, "epsilon", 2);
+  test_find(cm, "eta", 3);
+  test_find(cm, "gamma", 4);
+  test_find(cm, "charlie", 5);
+  test_find(cm, "aaa", 5);
+  test_find(cm, "zzz", 5);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.find(Transparent<int>{3});
+    assert(it != m.end());
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp
new file mode 100644
index 000000000000000..b5491f3b226746c
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound.pass.cpp
@@ -0,0 +1,71 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+//       iterator lower_bound(const key_type& k);
+// const_iterator lower_bound(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+    M m     = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.lower_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(0)), typename M::const_iterator);
+    assert(m.lower_bound(0) == m.begin());
+    assert(m.lower_bound(1) == m.begin());
+    assert(m.lower_bound(2) == m.begin() + 1);
+    assert(m.lower_bound(3) == m.begin() + 2);
+    assert(m.lower_bound(4) == m.begin() + 2);
+    assert(m.lower_bound(5) == m.begin() + 3);
+    assert(m.lower_bound(6) == m.begin() + 4);
+    assert(m.lower_bound(7) == m.begin() + 4);
+    assert(std::as_const(m).lower_bound(8) == m.begin() + 4);
+    assert(std::as_const(m).lower_bound(9) == m.end());
+  }
+  {
+    using M = std::flat_map<Key, Value, std::greater<Key>, KeyContainer, ValueContainer>;
+    M m     = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.lower_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(0)), typename M::const_iterator);
+    assert(m.lower_bound(0) == m.end());
+    assert(m.lower_bound(1) == m.begin() + 4);
+    assert(m.lower_bound(2) == m.begin() + 3);
+    assert(m.lower_bound(3) == m.begin() + 3);
+    assert(m.lower_bound(4) == m.begin() + 2);
+    assert(m.lower_bound(5) == m.begin() + 1);
+    assert(m.lower_bound(6) == m.begin() + 1);
+    assert(m.lower_bound(7) == m.begin() + 1);
+    assert(std::as_const(m).lower_bound(8) == m.begin());
+    assert(std::as_const(m).lower_bound(9) == m.begin());
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp
new file mode 100644
index 000000000000000..6a923c197e91eaa
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/lower_bound_transparent.pass.cpp
@@ -0,0 +1,95 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> iterator       lower_bound(const K& x);
+// template<class K> const_iterator lower_bound(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanLowerBound   = requires(M m, Transparent<int> k) { m.lower_bound(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanLowerBound<TransparentMap>);
+static_assert(CanLowerBound<const TransparentMap>);
+static_assert(!CanLowerBound<NonTransparentMap>);
+static_assert(!CanLowerBound<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m            = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.lower_bound(Transparent<std::string>{"abc"})), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(Transparent<std::string>{"b"})), typename M::const_iterator);
+
+  auto test_lower_bound = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto iter = map.lower_bound(Transparent<std::string>{expected_key});
+    assert(iter - map.begin() == expected_offset);
+  };
+
+  test_lower_bound(m, "abc", 0);
+  test_lower_bound(m, "alpha", 0);
+  test_lower_bound(m, "beta", 1);
+  test_lower_bound(m, "bets", 2);
+  test_lower_bound(m, "charlie", 2);
+  test_lower_bound(m, "echo", 2);
+  test_lower_bound(m, "epsilon", 2);
+  test_lower_bound(m, "eta", 3);
+  test_lower_bound(m, "gamma", 4);
+  test_lower_bound(m, "golf", 5);
+  test_lower_bound(m, "zzz", 5);
+
+  test_lower_bound(cm, "abc", 0);
+  test_lower_bound(cm, "alpha", 0);
+  test_lower_bound(cm, "beta", 1);
+  test_lower_bound(cm, "bets", 2);
+  test_lower_bound(cm, "charlie", 2);
+  test_lower_bound(cm, "echo", 2);
+  test_lower_bound(cm, "epsilon", 2);
+  test_lower_bound(cm, "eta", 3);
+  test_lower_bound(cm, "gamma", 4);
+  test_lower_bound(cm, "golf", 5);
+  test_lower_bound(cm, "zzz", 5);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.lower_bound(Transparent<int>{3});
+    assert(it != m.end());
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp
new file mode 100644
index 000000000000000..775e53286d6295d
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound.pass.cpp
@@ -0,0 +1,72 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+//       iterator upper_bound(const key_type& k);
+// const_iterator upper_bound(const key_type& k) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  {
+    using M = std::flat_map<Key, Value, std::less<Key>, KeyContainer, ValueContainer>;
+    M m     = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.upper_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).upper_bound(0)), typename M::const_iterator);
+    assert(m.upper_bound(0) == m.begin());
+    assert(m.upper_bound(1) == m.begin() + 1);
+    assert(m.upper_bound(2) == m.begin() + 2);
+    assert(m.upper_bound(3) == m.begin() + 2);
+    assert(m.upper_bound(4) == m.begin() + 3);
+    assert(m.upper_bound(5) == m.begin() + 4);
+    assert(m.upper_bound(6) == m.begin() + 4);
+    assert(std::as_const(m).upper_bound(7) == m.begin() + 4);
+    assert(std::as_const(m).upper_bound(8) == m.end());
+    assert(std::as_const(m).upper_bound(9) == m.end());
+  }
+
+  {
+    using M = std::flat_map<Key, Value, std::greater<Key>, KeyContainer, ValueContainer>;
+    M m     = {{1, 'a'}, {2, 'b'}, {4, 'd'}, {5, 'e'}, {8, 'h'}};
+    ASSERT_SAME_TYPE(decltype(m.upper_bound(0)), typename M::iterator);
+    ASSERT_SAME_TYPE(decltype(std::as_const(m).upper_bound(0)), typename M::const_iterator);
+    assert(m.upper_bound(0) == m.end());
+    assert(m.upper_bound(1) == m.end());
+    assert(m.upper_bound(2) == m.begin() + 4);
+    assert(m.upper_bound(3) == m.begin() + 3);
+    assert(m.upper_bound(4) == m.begin() + 3);
+    assert(m.upper_bound(5) == m.begin() + 2);
+    assert(m.upper_bound(6) == m.begin() + 1);
+    assert(m.upper_bound(7) == m.begin() + 1);
+    assert(std::as_const(m).upper_bound(8) == m.begin() + 1);
+    assert(std::as_const(m).upper_bound(9) == m.begin());
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<char>>();
+  test<std::deque<int>, std::vector<char>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<char>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<char, min_allocator<char>>>();
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp
new file mode 100644
index 000000000000000..4e83f920835dffc
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/flat.map.operations/upper_bound_transparent.pass.cpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// template<class K> iterator       upper_bound(const K& x);
+// template<class K> const_iterator upper_bound(const K& x) const;
+
+#include <cassert>
+#include <deque>
+#include <flat_map>
+#include <string>
+#include <utility>
+
+#include "MinSequenceContainer.h"
+#include "../helpers.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+
+// Constraints: The qualified-id Compare::is_transparent is valid and denotes a type.
+template <class M>
+concept CanUpperBound   = requires(M m, Transparent<int> k) { m.upper_bound(k); };
+using TransparentMap    = std::flat_map<int, double, TransparentComparator>;
+using NonTransparentMap = std::flat_map<int, double, NonTransparentComparator>;
+static_assert(CanUpperBound<TransparentMap>);
+static_assert(CanUpperBound<const TransparentMap>);
+static_assert(!CanUpperBound<NonTransparentMap>);
+static_assert(!CanUpperBound<const NonTransparentMap>);
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+  using M     = std::flat_map<Key, Value, TransparentComparator, KeyContainer, ValueContainer>;
+
+  M m            = {{"alpha", 1}, {"beta", 2}, {"epsilon", 3}, {"eta", 4}, {"gamma", 5}};
+  const auto& cm = m;
+  ASSERT_SAME_TYPE(decltype(m.lower_bound(Transparent<std::string>{"abc"})), typename M::iterator);
+  ASSERT_SAME_TYPE(decltype(std::as_const(m).lower_bound(Transparent<std::string>{"b"})), typename M::const_iterator);
+
+  auto test_upper_bound = [&](auto&& map, const std::string& expected_key, long expected_offset) {
+    auto iter = map.upper_bound(Transparent<std::string>{expected_key});
+    assert(iter - map.begin() == expected_offset);
+  };
+
+  test_upper_bound(m, "abc", 0);
+  test_upper_bound(m, "alpha", 1);
+  test_upper_bound(m, "beta", 2);
+  test_upper_bound(m, "bets", 2);
+  test_upper_bound(m, "charlie", 2);
+  test_upper_bound(m, "echo", 2);
+  test_upper_bound(m, "epsilon", 3);
+  test_upper_bound(m, "eta", 4);
+  test_upper_bound(m, "gamma", 5);
+  test_upper_bound(m, "golf", 5);
+  test_upper_bound(m, "zzz", 5);
+
+  test_upper_bound(cm, "abc", 0);
+  test_upper_bound(cm, "alpha", 1);
+  test_upper_bound(cm, "beta", 2);
+  test_upper_bound(cm, "bets", 2);
+  test_upper_bound(cm, "charlie", 2);
+  test_upper_bound(cm, "echo", 2);
+  test_upper_bound(cm, "epsilon", 3);
+  test_upper_bound(cm, "eta", 4);
+  test_upper_bound(cm, "gamma", 5);
+  test_upper_bound(cm, "golf", 5);
+  test_upper_bound(cm, "zzz", 5);
+}
+
+int main(int, char**) {
+  test<std::vector<std::string>, std::vector<int>>();
+  test<std::deque<std::string>, std::vector<int>>();
+  test<MinSequenceContainer<std::string>, MinSequenceContainer<int>>();
+  test<std::vector<std::string, min_allocator<std::string>>, std::vector<int, min_allocator<int>>>();
+  {
+    bool transparent_used = false;
+    TransparentComparator c(transparent_used);
+    std::flat_map<int, int, TransparentComparator> m(std::sorted_unique, {{1, 1}, {2, 2}, {3, 3}}, c);
+    assert(!transparent_used);
+    auto it = m.upper_bound(Transparent<int>{2});
+    assert(it != m.end());
+    assert(transparent_used);
+  }
+
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/helpers.h b/libcxx/test/std/containers/container.adaptors/flat.map/helpers.h
new file mode 100644
index 000000000000000..8dbb85a6c0acf13
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/helpers.h
@@ -0,0 +1,394 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_FLAT_MAP_HELPERS_H
+#define SUPPORT_FLAT_MAP_HELPERS_H
+
+#include <algorithm>
+#include <cassert>
+#include <string>
+#include <vector>
+#include <flat_map>
+
+#include "test_allocator.h"
+#include "test_macros.h"
+
+template <class... Args>
+void check_invariant(const std::flat_map<Args...>& m) {
+  assert(m.keys().size() == m.values().size());
+  const auto& keys = m.keys();
+  assert(std::is_sorted(keys.begin(), keys.end(), m.key_comp()));
+  auto key_equal = [&](const auto& x, const auto& y) {
+    const auto& c = m.key_comp();
+    return !c(x, y) && !c(y, x);
+  };
+  assert(std::adjacent_find(keys.begin(), keys.end(), key_equal) == keys.end());
+}
+
+struct StartsWith {
+  explicit StartsWith(char ch) : lower_(1, ch), upper_(1, ch + 1) {}
+  StartsWith(const StartsWith&)     = delete;
+  void operator=(const StartsWith&) = delete;
+  struct Less {
+    using is_transparent = void;
+    bool operator()(const std::string& a, const std::string& b) const { return a < b; }
+    bool operator()(const StartsWith& a, const std::string& b) const { return a.upper_ <= b; }
+    bool operator()(const std::string& a, const StartsWith& b) const { return a < b.lower_; }
+    bool operator()(const StartsWith&, const StartsWith&) const {
+      assert(false); // should not be called
+      return false;
+    }
+  };
+
+private:
+  std::string lower_;
+  std::string upper_;
+};
+
+template <class T>
+struct CopyOnlyVector : std::vector<T> {
+  using std::vector<T>::vector;
+
+  CopyOnlyVector(const CopyOnlyVector&) = default;
+  CopyOnlyVector(CopyOnlyVector&& other) : CopyOnlyVector(other) {}
+  CopyOnlyVector(CopyOnlyVector&& other, std::vector<T>::allocator_type alloc) : CopyOnlyVector(other, alloc) {}
+
+  CopyOnlyVector& operator=(const CopyOnlyVector&) = default;
+  CopyOnlyVector& operator=(CopyOnlyVector& other) { return this->operator=(other); }
+};
+
+template <class T, bool ConvertibleToT = false>
+struct Transparent {
+  T t;
+
+  operator T() const
+    requires ConvertibleToT
+  {
+    return t;
+  }
+};
+
+template <class T>
+using ConvertibleTransparent = Transparent<T, true>;
+
+template <class T>
+using NonConvertibleTransparent = Transparent<T, false>;
+
+struct TransparentComparator {
+  using is_transparent = void;
+
+  bool* transparent_used  = nullptr;
+  TransparentComparator() = default;
+  TransparentComparator(bool& used) : transparent_used(&used) {}
+
+  template <class T, bool Convertible>
+  bool operator()(const T& t, const Transparent<T, Convertible>& transparent) const {
+    if (transparent_used != nullptr) {
+      *transparent_used = true;
+    }
+    return t < transparent.t;
+  }
+
+  template <class T, bool Convertible>
+  bool operator()(const Transparent<T, Convertible>& transparent, const T& t) const {
+    if (transparent_used != nullptr) {
+      *transparent_used = true;
+    }
+    return transparent.t < t;
+  }
+
+  template <class T>
+  bool operator()(const T& t1, const T& t2) const {
+    return t1 < t2;
+  }
+};
+
+struct NonTransparentComparator {
+  template <class T, bool Convertible>
+  bool operator()(const T&, const Transparent<T, Convertible>&) const;
+
+  template <class T, bool Convertible>
+  bool operator()(const Transparent<T, Convertible>&, const T&) const;
+
+  template <class T>
+  bool operator()(const T&, const T&) const;
+};
+
+struct NoDefaultCtr {
+  NoDefaultCtr() = delete;
+};
+
+#ifndef TEST_HAS_NO_EXCEPTIONS
+template <class T>
+struct EmplaceUnsafeContainer : std::vector<T> {
+  using std::vector<T>::vector;
+
+  template <class... Args>
+  auto emplace(Args&&... args) -> decltype(std::declval<std::vector<T>>().emplace(std::forward<Args>(args)...)) {
+    if (this->size() > 1) {
+      auto it1 = this->begin();
+      auto it2 = it1 + 1;
+      // messing up the container
+      std::iter_swap(it1, it2);
+    }
+
+    throw 42;
+  }
+
+  template <class... Args>
+  auto insert(Args&&... args) -> decltype(std::declval<std::vector<T>>().insert(std::forward<Args>(args)...)) {
+    if (this->size() > 1) {
+      auto it1 = this->begin();
+      auto it2 = it1 + 1;
+      // messing up the container
+      std::iter_swap(it1, it2);
+    }
+
+    throw 42;
+  }
+};
+
+template <class T>
+struct ThrowOnEraseContainer : std::vector<T> {
+  using std::vector<T>::vector;
+
+  template <class... Args>
+  auto erase(Args&&... args) -> decltype(std::declval<std::vector<T>>().erase(std::forward<Args>(args)...)) {
+    throw 42;
+  }
+};
+
+template <class T>
+struct ThrowOnMoveContainer : std::vector<T> {
+  using std::vector<T>::vector;
+
+  ThrowOnMoveContainer(ThrowOnMoveContainer&&) { throw 42; }
+
+  ThrowOnMoveContainer& operator=(ThrowOnMoveContainer&&) { throw 42; }
+};
+
+#endif
+
+template <class F>
+void test_emplace_exception_guarantee([[maybe_unused]] F&& emplace_function) {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  using C = TransparentComparator;
+  {
+    // Throw on emplace the key, and underlying has strong exception guarantee
+    using KeyContainer = std::vector<int, test_allocator<int>>;
+    using M            = std::flat_map<int, int, C, KeyContainer>;
+
+    LIBCPP_STATIC_ASSERT(std::__container_traits<KeyContainer>::__emplacement_has_strong_exception_safety_guarantee);
+
+    test_allocator_statistics stats;
+
+    KeyContainer a({1, 2, 3, 4}, test_allocator<int>{&stats});
+    std::vector<int> b                    = {5, 6, 7, 8};
+    [[maybe_unused]] auto expected_keys   = a;
+    [[maybe_unused]] auto expected_values = b;
+    M m(std::sorted_unique, std::move(a), std::move(b));
+
+    stats.throw_after = 1;
+    try {
+      emplace_function(m, 0, 0);
+      assert(false);
+    } catch (const std::bad_alloc&) {
+      check_invariant(m);
+      // In libc++, the flat_map is unchanged
+      LIBCPP_ASSERT(m.size() == 4);
+      LIBCPP_ASSERT(m.keys() == expected_keys);
+      LIBCPP_ASSERT(m.values() == expected_values);
+    }
+  }
+  {
+    // Throw on emplace the key, and underlying has no strong exception guarantee
+    using KeyContainer = EmplaceUnsafeContainer<int>;
+    using M            = std::flat_map<int, int, C, KeyContainer>;
+
+    LIBCPP_STATIC_ASSERT(!std::__container_traits<KeyContainer>::__emplacement_has_strong_exception_safety_guarantee);
+    KeyContainer a     = {1, 2, 3, 4};
+    std::vector<int> b = {5, 6, 7, 8};
+    M m(std::sorted_unique, std::move(a), std::move(b));
+    try {
+      emplace_function(m, 0, 0);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, the flat_map is cleared
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+  {
+    // Throw on emplace the value, and underlying has strong exception guarantee
+    using ValueContainer = std::vector<int, test_allocator<int>>;
+    ;
+    using M = std::flat_map<int, int, C, std::vector<int>, ValueContainer>;
+
+    LIBCPP_STATIC_ASSERT(std::__container_traits<ValueContainer>::__emplacement_has_strong_exception_safety_guarantee);
+
+    std::vector<int> a = {1, 2, 3, 4};
+    test_allocator_statistics stats;
+    ValueContainer b({1, 2, 3, 4}, test_allocator<int>{&stats});
+
+    [[maybe_unused]] auto expected_keys   = a;
+    [[maybe_unused]] auto expected_values = b;
+    M m(std::sorted_unique, std::move(a), std::move(b));
+
+    stats.throw_after = 1;
+    try {
+      emplace_function(m, 0, 0);
+      assert(false);
+    } catch (const std::bad_alloc&) {
+      check_invariant(m);
+      // In libc++, the emplaced key is erased and the flat_map is unchanged
+      LIBCPP_ASSERT(m.size() == 4);
+      LIBCPP_ASSERT(m.keys() == expected_keys);
+      LIBCPP_ASSERT(m.values() == expected_values);
+    }
+  }
+  {
+    // Throw on emplace the value, and underlying has no strong exception guarantee
+    using ValueContainer = EmplaceUnsafeContainer<int>;
+    using M              = std::flat_map<int, int, C, std::vector<int>, ValueContainer>;
+
+    LIBCPP_STATIC_ASSERT(!std::__container_traits<ValueContainer>::__emplacement_has_strong_exception_safety_guarantee);
+    std::vector<int> a = {1, 2, 3, 4};
+    ValueContainer b   = {1, 2, 3, 4};
+
+    M m(std::sorted_unique, std::move(a), std::move(b));
+
+    try {
+      emplace_function(m, 0, 0);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, the flat_map is cleared
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+  {
+    // Throw on emplace the value, then throw again on erasing the key
+    using KeyContainer   = ThrowOnEraseContainer<int>;
+    using ValueContainer = std::vector<int, test_allocator<int>>;
+    using M              = std::flat_map<int, int, C, KeyContainer, ValueContainer>;
+
+    LIBCPP_STATIC_ASSERT(std::__container_traits<ValueContainer>::__emplacement_has_strong_exception_safety_guarantee);
+
+    KeyContainer a = {1, 2, 3, 4};
+    test_allocator_statistics stats;
+    ValueContainer b({1, 2, 3, 4}, test_allocator<int>{&stats});
+
+    M m(std::sorted_unique, std::move(a), std::move(b));
+    stats.throw_after = 1;
+    try {
+      emplace_function(m, 0, 0);
+      assert(false);
+    } catch (const std::bad_alloc&) {
+      check_invariant(m);
+      // In libc++, we try to erase the key after value emplacement failure.
+      // and after erasure failure, we clear the flat_map
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+#endif
+}
+
+template <class F>
+void test_insert_range_exception_guarantee([[maybe_unused]] F&& insert_function) {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  using KeyContainer   = EmplaceUnsafeContainer<int>;
+  using ValueContainer = std::vector<int>;
+  using M              = std::flat_map<int, int, std::ranges::less, KeyContainer, ValueContainer>;
+  test_allocator_statistics stats;
+  KeyContainer a{1, 2, 3, 4};
+  ValueContainer b{1, 2, 3, 4};
+  M m(std::sorted_unique, std::move(a), std::move(b));
+
+  std::vector<std::pair<int, int>> newValues = {{0, 0}, {1, 1}, {5, 5}, {6, 6}, {7, 7}, {8, 8}};
+  stats.throw_after                          = 1;
+  try {
+    insert_function(m, newValues);
+    assert(false);
+  } catch (int) {
+    check_invariant(m);
+    // In libc++, we clear if anything goes wrong when inserting a range
+    LIBCPP_ASSERT(m.size() == 0);
+  }
+#endif
+}
+
+template <class F>
+void test_erase_exception_guarantee([[maybe_unused]] F&& erase_function) {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  {
+    // key erase throws
+    using KeyContainer   = ThrowOnEraseContainer<int>;
+    using ValueContainer = std::vector<int>;
+    using M              = std::flat_map<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    KeyContainer a{1, 2, 3, 4};
+    ValueContainer b{1, 2, 3, 4};
+    M m(std::sorted_unique, std::move(a), std::move(b));
+    try {
+      erase_function(m, 3);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we clear if anything goes wrong when erasing
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+  {
+    // key erase throws
+    using KeyContainer   = std::vector<int>;
+    using ValueContainer = ThrowOnEraseContainer<int>;
+    using M              = std::flat_map<int, int, TransparentComparator, KeyContainer, ValueContainer>;
+
+    KeyContainer a{1, 2, 3, 4};
+    ValueContainer b{1, 2, 3, 4};
+    M m(std::sorted_unique, std::move(a), std::move(b));
+    try {
+      erase_function(m, 3);
+      assert(false);
+    } catch (int) {
+      check_invariant(m);
+      // In libc++, we clear if anything goes wrong when erasing
+      LIBCPP_ASSERT(m.size() == 0);
+    }
+  }
+#endif
+}
+class Moveable {
+  int int_;
+  double double_;
+
+public:
+  Moveable() : int_(0), double_(0) {}
+  Moveable(int i, double d) : int_(i), double_(d) {}
+  Moveable(Moveable&& x) : int_(x.int_), double_(x.double_) {
+    x.int_    = -1;
+    x.double_ = -1;
+  }
+  Moveable& operator=(Moveable&& x) {
+    int_      = x.int_;
+    x.int_    = -1;
+    double_   = x.double_;
+    x.double_ = -1;
+    return *this;
+  }
+
+  Moveable(const Moveable&)            = delete;
+  Moveable& operator=(const Moveable&) = delete;
+  bool operator==(const Moveable& x) const { return int_ == x.int_ && double_ == x.double_; }
+  bool operator<(const Moveable& x) const { return int_ < x.int_ || (int_ == x.int_ && double_ < x.double_); }
+
+  int get() const { return int_; }
+  bool moved() const { return int_ == -1; }
+};
+
+#endif // SUPPORT_FLAT_MAP_HELPERS_H
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp
new file mode 100644
index 000000000000000..81c590ba73a1579
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/incomplete_type.pass.cpp
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// Check that std::flat_map and its iterators can be instantiated with an incomplete
+// type.
+
+#include <flat_map>
+
+struct A {
+  using Map = std::flat_map<A, A>;
+  int data;
+  Map m;
+  Map::iterator it;
+  Map::const_iterator cit;
+};
+
+// Implement the operator< required in order to instantiate flat_map<A, X>
+bool operator<(A const& L, A const& R) { return L.data < R.data; }
+
+int main(int, char**) {
+  A a;
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp
new file mode 100644
index 000000000000000..fffe71158070407
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/op_compare.pass.cpp
@@ -0,0 +1,118 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+// <flat_map>
+
+// friend bool operator==(const flat_map& x, const flat_map& y);
+// friend synth-three-way-result<value_type>
+//   operator<=>(const flat_map& x, const flat_map& y);
+
+#include <algorithm>
+#include <cassert>
+#include <deque>
+#include <compare>
+#include <flat_map>
+#include <functional>
+#include <limits>
+#include <vector>
+
+#include "MinSequenceContainer.h"
+#include "test_macros.h"
+#include "min_allocator.h"
+#include "test_allocator.h"
+#include "test_comparisons.h"
+#include "test_container_comparisons.h"
+
+template <class KeyContainer, class ValueContainer>
+void test() {
+  using Key   = typename KeyContainer::value_type;
+  using Value = typename ValueContainer::value_type;
+
+  {
+    using C = std::flat_map<Key, Value>;
+    C s1    = {{1, 1}};
+    C s2    = {{2, 0}}; // {{1,1}} versus {{2,0}}
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::strong_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisons(s1, s2, false, true));
+    s2 = {{1, 1}}; // {{1,1}} versus {{1,1}}
+    assert(testComparisons(s1, s2, true, false));
+    s2 = {{1, 1}, {2, 0}}; // {{1,1}} versus {{1,1},{2,0}}
+    assert(testComparisons(s1, s2, false, true));
+    s1 = {{0, 0}, {1, 1}, {2, 2}}; // {{0,0},{1,1},{2,2}} versus {{1,1},{2,0}}
+    assert(testComparisons(s1, s2, false, true));
+    s2 = {{0, 0}, {1, 1}, {2, 3}}; // {{0,0},{1,1},{2,2}} versus {{0,0},{1,1},{2,3}}
+    assert(testComparisons(s1, s2, false, true));
+  }
+  {
+    // Comparisons use value_type's native operators, not the comparator
+    using C = std::flat_map<Key, Value, std::greater<Key>>;
+    C s1    = {{1, 1}};
+    C s2    = {{2, 0}}; // {{1,1}} versus {{2,0}}
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::strong_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisons(s1, s2, false, true));
+    s2 = {{1, 1}}; // {{1,1}} versus {{1,1}}
+    assert(testComparisons(s1, s2, true, false));
+    s2 = {{1, 1}, {2, 0}}; // {{1,1}} versus {{2,0},{1,1}}
+    assert(testComparisons(s1, s2, false, true));
+    s1 = {{0, 0}, {1, 1}, {2, 2}}; // {{2,2},{1,1},{0,0}} versus {2,0},{1,1}}
+    assert(testComparisons(s1, s2, false, false));
+    s2 = {{0, 0}, {1, 1}, {2, 3}}; // {{2,2},{1,1},{0,0}} versus {{2,3},{1,1},{0,0}}
+    assert(testComparisons(s1, s2, false, true));
+  }
+}
+
+int main(int, char**) {
+  test<std::vector<int>, std::vector<int>>();
+  test<std::deque<int>, std::deque<int>>();
+  test<MinSequenceContainer<int>, MinSequenceContainer<int>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+  test<std::vector<int, min_allocator<int>>, std::vector<int, min_allocator<int>>>();
+
+  {
+    using C = std::flat_map<double, int>;
+    C s1    = {{1, 1}};
+    C s2    = C(std::sorted_unique, {{std::numeric_limits<double>::quiet_NaN(), 2}});
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisonsComplete(s1, s2, false, false, false));
+  }
+  {
+    using C = std::flat_map<int, double>;
+    C s1    = {{1, 1}};
+    C s2    = C(std::sorted_unique, {{2, std::numeric_limits<double>::quiet_NaN()}});
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisonsComplete(s1, s2, false, true, false));
+    s2 = C(std::sorted_unique, {{1, std::numeric_limits<double>::quiet_NaN()}});
+    assert(testComparisonsComplete(s1, s2, false, false, false));
+  }
+  {
+    // Comparisons use value_type's native operators, not the comparator
+    struct StrongComp {
+      bool operator()(double a, double b) const { return std::strong_order(a, b) < 0; }
+    };
+    using C = std::flat_map<double, double, StrongComp>;
+    C s1    = {{1, 1}};
+    C s2    = {{std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()}};
+    ASSERT_SAME_TYPE(decltype(s1 <=> s2), std::partial_ordering);
+    AssertComparisonsReturnBool<C>();
+    assert(testComparisonsComplete(s1, s2, false, false, false));
+    s1 = {{{1, 1}, {std::numeric_limits<double>::quiet_NaN(), 1}}};
+    s2 = {{{std::numeric_limits<double>::quiet_NaN(), 1}, {1, 1}}};
+    assert(std::lexicographical_compare_three_way(
+               s1.keys().begin(), s1.keys().end(), s2.keys().begin(), s2.keys().end(), std::strong_order) ==
+           std::strong_ordering::equal);
+    assert(s1 != s2);
+    assert((s1 <=> s2) == std::partial_ordering::unordered);
+  }
+  return 0;
+}
diff --git a/libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp b/libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp
new file mode 100644
index 000000000000000..ea9d4d7fca67f00
--- /dev/null
+++ b/libcxx/test/std/containers/container.adaptors/flat.map/types.compile.pass.cpp
@@ -0,0 +1,133 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
+
+//  using key_type               = Key;
+//  using mapped_type            = T;
+//  using value_type             = pair<key_type, mapped_type>;
+//  using key_compare            = Compare;
+//  using reference              = pair<const key_type&, mapped_type&>;
+//  using const_reference        = pair<const key_type&, const mapped_type&>;
+//  using size_type              = size_t;
+//  using difference_type        = ptrdiff_t;
+//  using iterator               = implementation-defined; // see [container.requirements]
+//  using const_iterator         = implementation-defined; // see [container.requirements]
+//  using reverse_iterator       = std::reverse_iterator<iterator>;
+//  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+//  using key_container_type     = KeyContainer;
+//  using mapped_container_type  = MappedContainer;
+
+//  class value_compare;
+
+//  struct containers {
+//    key_container_type keys;
+//    mapped_container_type values;
+//  };
+
+#include <concepts>
+#include <deque>
+#include <flat_map>
+#include <functional>
+#include <ranges>
+#include <string>
+#include <vector>
+#include "min_allocator.h"
+
+void test() {
+  {
+    using M = std::flat_map<int, double>;
+    static_assert(std::is_same_v<typename M::key_type, int>);
+    static_assert(std::is_same_v<typename M::mapped_type, double>);
+    static_assert(std::is_same_v<typename M::value_type, std::pair<int, double>>);
+    static_assert(std::is_same_v<typename M::key_compare, std::less<int>>);
+    static_assert(std::is_same_v<typename M::reference, std::pair<const int&, double&>>);
+    static_assert(std::is_same_v<typename M::const_reference, std::pair<const int&, const double&>>);
+    static_assert(std::is_same_v<typename M::size_type, size_t>);
+    static_assert(std::is_same_v<typename M::difference_type, ptrdiff_t>);
+    static_assert(requires { typename M::iterator; });
+    static_assert(requires { typename M::const_iterator; });
+    static_assert(std::is_same_v<typename M::reverse_iterator, std::reverse_iterator<typename M::iterator>>);
+    static_assert(
+        std::is_same_v<typename M::const_reverse_iterator, std::reverse_iterator<typename M::const_iterator>>);
+    static_assert(std::is_same_v<typename M::key_container_type, std::vector<int>>);
+    static_assert(std::is_same_v<typename M::mapped_container_type, std::vector<double>>);
+    static_assert(requires { typename M::value_compare; });
+    static_assert(requires { typename M::containers; });
+    static_assert(std::is_same_v<decltype(M::containers::keys), std::vector<int>>);
+    static_assert(std::is_same_v<decltype(M::containers::values), std::vector<double>>);
+  }
+
+  {
+    struct A {};
+    struct Compare {
+      bool operator()(const std::string&, const std::string&) const;
+    };
+    using M = std::flat_map<std::string, A, Compare, std::deque<std::string>, std::deque<A>>;
+    static_assert(std::is_same_v<typename M::key_type, std::string>);
+    static_assert(std::is_same_v<typename M::mapped_type, A>);
+    static_assert(std::is_same_v<typename M::value_type, std::pair<std::string, A>>);
+    static_assert(std::is_same_v<typename M::key_compare, Compare>);
+    static_assert(std::is_same_v<typename M::reference, std::pair<const std::string&, A&>>);
+    static_assert(std::is_same_v<typename M::const_reference, std::pair<const std::string&, const A&>>);
+    static_assert(std::is_same_v<typename M::size_type, size_t>);
+    static_assert(std::is_same_v<typename M::difference_type, ptrdiff_t>);
+    static_assert(requires { typename M::iterator; });
+    static_assert(requires { typename M::const_iterator; });
+    static_assert(std::is_same_v<typename M::reverse_iterator, std::reverse_iterator<typename M::iterator>>);
+    static_assert(
+        std::is_same_v<typename M::const_reverse_iterator, std::reverse_iterator<typename M::const_iterator>>);
+    static_assert(std::is_same_v<typename M::key_container_type, std::deque<std::string>>);
+    static_assert(std::is_same_v<typename M::mapped_container_type, std::deque<A>>);
+    static_assert(requires { typename M::value_compare; });
+    static_assert(requires { typename M::containers; });
+    static_assert(std::is_same_v<decltype(M::containers::keys), std::deque<std::string>>);
+    static_assert(std::is_same_v<decltype(M::containers::values), std::deque<A>>);
+  }
+  {
+    using C = std::flat_map<int, short>;
+    static_assert(std::is_same_v<C::key_type, int>);
+    static_assert(std::is_same_v<C::mapped_type, short>);
+    static_assert(std::is_same_v<C::value_type, std::pair<int, short>>);
+    static_assert(std::is_same_v<C::key_compare, std::less<int>>);
+    static_assert(!std::is_same_v<C::value_compare, std::less<int>>);
+    static_assert(std::is_same_v<C::reference, std::pair<const int&, short&>>);
+    static_assert(std::is_same_v<C::const_reference, std::pair<const int&, const short&>>);
+    static_assert(std::random_access_iterator<C::iterator>);
+    static_assert(std::random_access_iterator<C::const_iterator>);
+    static_assert(std::random_access_iterator<C::reverse_iterator>);
+    static_assert(std::random_access_iterator<C::const_reverse_iterator>);
+    static_assert(std::is_same_v<C::reverse_iterator, std::reverse_iterator<C::iterator>>);
+    static_assert(std::is_same_v<C::const_reverse_iterator, std::reverse_iterator<C::const_iterator>>);
+    static_assert(std::is_same_v<C::size_type, std::size_t>);
+    static_assert(std::is_same_v<C::difference_type, std::ptrdiff_t>);
+    static_assert(std::is_same_v<C::key_container_type, std::vector<int>>);
+    static_assert(std::is_same_v<C::mapped_container_type, std::vector<short>>);
+  }
+  {
+    using C = std::flat_map<short, int, std::greater<long>, std::deque<short, min_allocator<short>>>;
+    static_assert(std::is_same_v<C::key_type, short>);
+    static_assert(std::is_same_v<C::mapped_type, int>);
+    static_assert(std::is_same_v<C::value_type, std::pair<short, int>>);
+    static_assert(std::is_same_v<C::key_compare, std::greater<long>>);
+    static_assert(!std::is_same_v<C::value_compare, std::greater<long>>);
+    static_assert(std::is_same_v<C::reference, std::pair<const short&, int&>>);
+    static_assert(std::is_same_v<C::const_reference, std::pair<const short&, const int&>>);
+    static_assert(std::random_access_iterator<C::iterator>);
+    static_assert(std::random_access_iterator<C::const_iterator>);
+    static_assert(std::random_access_iterator<C::reverse_iterator>);
+    static_assert(std::random_access_iterator<C::const_reverse_iterator>);
+    static_assert(std::is_same_v<C::reverse_iterator, std::reverse_iterator<C::iterator>>);
+    static_assert(std::is_same_v<C::const_reverse_iterator, std::reverse_iterator<C::const_iterator>>);
+    // size_type is invariably size_t
+    static_assert(std::is_same_v<C::size_type, std::size_t>);
+    static_assert(std::is_same_v<C::difference_type, std::ptrdiff_t>);
+    static_assert(std::is_same_v<C::key_container_type, std::deque<short, min_allocator<short>>>);
+    static_assert(std::is_same_v<C::mapped_container_type, std::vector<int>>);
+  }
+}
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
index 3ef5aeecc1b0c90..c9c1bac2fb4a0c9 100644
--- a/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector/vector.cons/exceptions.pass.cpp
@@ -139,6 +139,14 @@ int main(int, char**) {
   check_new_delete_called();
 #endif  // TEST_STD_VER >= 14
 
+  try { // Throw in vector(size_type, value_type, const allocator_type&) from the type
+    int throw_after = 1;
+    ThrowingT v(throw_after);
+    std::vector<ThrowingT> vec(1, v, std::allocator<ThrowingT>());
+  } catch (int) {
+  }
+  check_new_delete_called();
+
   try { // Throw in vector(InputIterator, InputIterator) from input iterator
     std::vector<int> vec((Iterator<std::input_iterator_tag>()), Iterator<std::input_iterator_tag>(2));
   } catch (int) {
diff --git a/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp
index 4dff57f84f202cd..764f4d02f44f44a 100644
--- a/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-// <ciso646>
+// <ciso646> // removed in C++20, but still provided by libc++ as an extension
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
 
 #include <ciso646>
diff --git a/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp
new file mode 100644
index 000000000000000..e7290aab2c6616b
--- /dev/null
+++ b/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// test <stdalign.h>
+//
+// Even though <stdalign.h> is not provided by libc++,
+// we still test that using it with libc++ on the search path will work.
+
+// TODO: GCC doesn't provide a proper <stdalign.h> for C++ until 15.
+// UNSUPPORTED: gcc
+
+#include <stdalign.h>
+
+#ifndef __alignas_is_defined
+#  error __alignas_is_defined not defined
+#endif
+
+#ifndef __alignof_is_defined
+#  error __alignof_is_defined not defined
+#endif
+
+#ifdef alignas
+#  error alignas should not be defined
+#endif
+
+#ifdef alignof
+#  error alignof should not be defined
+#endif
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
new file mode 100644
index 000000000000000..0eaf82ce5cef016
--- /dev/null
+++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <ccomplex>
+
+// check that <ccomplex> is deprecated in C++17 and removed in C++20
+// When built with modules, <ccomplex> should be omitted.
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: clang-modules-build
+
+#include "test_macros.h"
+
+#include <ccomplex>
+
+#if TEST_STD_VER >= 20
+// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include <complex> instead.}}
+#else
+// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include <complex> instead.}}
+#endif
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
new file mode 100644
index 000000000000000..04acd1008154856
--- /dev/null
+++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <ciso646>
+
+// check that <ciso646> is removed in C++20
+// When built with modules, <ciso646> should be omitted.
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: clang-modules-build
+
+#include <ciso646>
+// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include <version> instead.}}
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
new file mode 100644
index 000000000000000..dc9f1af55b3f14b
--- /dev/null
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <cstdalign>
+
+// check that <cstdalign> is deprecated in C++17 and removed in C++20
+// When built with modules, <cstdalign> should be omitted.
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: clang-modules-build
+
+#include "test_macros.h"
+
+#include <cstdalign>
+
+#if TEST_STD_VER >= 20
+// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}}
+#else
+// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}}
+#endif
diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
new file mode 100644
index 000000000000000..eddefe14d35eac8
--- /dev/null
+++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <cstdbool>
+
+// check that <cstdbool> is deprecated in C++17 and removed in C++20
+// When built with modules, <cstdbool> should be omitted.
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: clang-modules-build
+
+#include "test_macros.h"
+
+#include <cstdbool>
+
+#if TEST_STD_VER >= 20
+// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}}
+#else
+// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}}
+#endif
diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
new file mode 100644
index 000000000000000..097ab1643d15afd
--- /dev/null
+++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp
@@ -0,0 +1,25 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <ctgmath>
+
+// check that <ctgmath> is deprecated in C++17 and removed in C++20
+// When built with modules, <ctgmath> should be omitted.
+
+// UNSUPPORTED: c++03, c++11, c++14
+// UNSUPPORTED: clang-modules-build
+
+#include "test_macros.h"
+
+#include <ctgmath>
+
+#if TEST_STD_VER >= 20
+// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include <cmath> and <complex> instead.}}
+#else
+// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include <cmath> and <complex> instead.}}
+#endif
diff --git a/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp b/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp
index f205ac971e5f08f..8cc9924a3cdae14 100644
--- a/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp
+++ b/libcxx/test/std/experimental/simd/simd.class/simd_unary.pass.cpp
@@ -16,6 +16,10 @@
 //   clang: error: unable to execute command: Illegal instruction: 4
 // XFAIL: target=x86_64-apple-macosx13.7
 
+// FIXME: The following issue occurs on Windows to Armv7 Ubuntu Linux:
+//   Assertion failed: N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"
+// XFAIL: target=armv7-unknown-linux-gnueabihf
+
 // <experimental/simd>
 //
 // [simd.class]
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp
index f265be091f79b5f..91abbbc77837bd9 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp
@@ -19,6 +19,7 @@
     __cpp_lib_constrained_equality      202403L [C++26]
     __cpp_lib_freestanding_optional     202311L [C++26]
     __cpp_lib_optional                  201606L [C++17]
+                                        202106L [C++20]
                                         202110L [C++23]
     __cpp_lib_optional_range_support    202406L [C++26]
 */
@@ -96,8 +97,8 @@
 # ifndef __cpp_lib_optional
 #   error "__cpp_lib_optional should be defined in c++20"
 # endif
-# if __cpp_lib_optional != 201606L
-#   error "__cpp_lib_optional should have the value 201606L in c++20"
+# if __cpp_lib_optional != 202106L
+#   error "__cpp_lib_optional should have the value 202106L in c++20"
 # endif
 
 # ifdef __cpp_lib_optional_range_support
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp
index 4dcc477696bfdde..598e976bda3cf60 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp
@@ -19,6 +19,8 @@
     __cpp_lib_constrained_equality    202403L [C++26]
     __cpp_lib_freestanding_variant    202311L [C++26]
     __cpp_lib_variant                 202102L [C++17]
+                                      202106L [C++20]
+                                      202306L [C++26]
 */
 
 #include <variant>
@@ -82,8 +84,8 @@
 # ifndef __cpp_lib_variant
 #   error "__cpp_lib_variant should be defined in c++20"
 # endif
-# if __cpp_lib_variant != 202102L
-#   error "__cpp_lib_variant should have the value 202102L in c++20"
+# if __cpp_lib_variant != 202106L
+#   error "__cpp_lib_variant should have the value 202106L in c++20"
 # endif
 
 #elif TEST_STD_VER == 23
@@ -99,8 +101,8 @@
 # ifndef __cpp_lib_variant
 #   error "__cpp_lib_variant should be defined in c++23"
 # endif
-# if __cpp_lib_variant != 202102L
-#   error "__cpp_lib_variant should have the value 202102L in c++23"
+# if __cpp_lib_variant != 202106L
+#   error "__cpp_lib_variant should have the value 202106L in c++23"
 # endif
 
 #elif TEST_STD_VER > 23
@@ -134,8 +136,8 @@
 # ifndef __cpp_lib_variant
 #   error "__cpp_lib_variant should be defined in c++26"
 # endif
-# if __cpp_lib_variant != 202102L
-#   error "__cpp_lib_variant should have the value 202102L in c++26"
+# if __cpp_lib_variant != 202306L
+#   error "__cpp_lib_variant should have the value 202306L in c++26"
 # endif
 
 #endif // TEST_STD_VER > 23
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index 0614f64a2ef04d6..5deaee16895f66c 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -158,6 +158,7 @@
     __cpp_lib_not_fn                                        201603L [C++17]
     __cpp_lib_null_iterators                                201304L [C++14]
     __cpp_lib_optional                                      201606L [C++17]
+                                                            202106L [C++20]
                                                             202110L [C++23]
     __cpp_lib_optional_range_support                        202406L [C++26]
     __cpp_lib_out_ptr                                       202106L [C++23]
@@ -244,6 +245,8 @@
     __cpp_lib_unreachable                                   202202L [C++23]
     __cpp_lib_unwrap_ref                                    201811L [C++20]
     __cpp_lib_variant                                       202102L [C++17]
+                                                            202106L [C++20]
+                                                            202306L [C++26]
     __cpp_lib_void_t                                        201411L [C++17]
 */
 
@@ -4087,8 +4090,8 @@
 # ifndef __cpp_lib_optional
 #   error "__cpp_lib_optional should be defined in c++20"
 # endif
-# if __cpp_lib_optional != 201606L
-#   error "__cpp_lib_optional should have the value 201606L in c++20"
+# if __cpp_lib_optional != 202106L
+#   error "__cpp_lib_optional should have the value 202106L in c++20"
 # endif
 
 # ifdef __cpp_lib_optional_range_support
@@ -4569,8 +4572,8 @@
 # ifndef __cpp_lib_variant
 #   error "__cpp_lib_variant should be defined in c++20"
 # endif
-# if __cpp_lib_variant != 202102L
-#   error "__cpp_lib_variant should have the value 202102L in c++20"
+# if __cpp_lib_variant != 202106L
+#   error "__cpp_lib_variant should have the value 202106L in c++20"
 # endif
 
 # ifndef __cpp_lib_void_t
@@ -6196,8 +6199,8 @@
 # ifndef __cpp_lib_variant
 #   error "__cpp_lib_variant should be defined in c++23"
 # endif
-# if __cpp_lib_variant != 202102L
-#   error "__cpp_lib_variant should have the value 202102L in c++23"
+# if __cpp_lib_variant != 202106L
+#   error "__cpp_lib_variant should have the value 202106L in c++23"
 # endif
 
 # ifndef __cpp_lib_void_t
@@ -8141,8 +8144,8 @@
 # ifndef __cpp_lib_variant
 #   error "__cpp_lib_variant should be defined in c++26"
 # endif
-# if __cpp_lib_variant != 202102L
-#   error "__cpp_lib_variant should have the value 202102L in c++26"
+# if __cpp_lib_variant != 202306L
+#   error "__cpp_lib_variant should have the value 202306L in c++26"
 # endif
 
 # ifndef __cpp_lib_void_t
diff --git a/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp
new file mode 100644
index 000000000000000..fbbaf9b2d136f94
--- /dev/null
+++ b/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// test <cstdalign> // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
+#include <cstdalign>
+
+#ifndef __alignas_is_defined
+#  error __alignas_is_defined not defined
+#endif
+
+#ifndef __alignof_is_defined
+#  error __alignof_is_defined not defined
+#endif
+
+#ifdef alignas
+#  error alignas should not be defined
+#endif
+
+#ifdef alignof
+#  error alignof should not be defined
+#endif
diff --git a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp
index 1d0e9b06a43d2eb..9a35eea507c40ad 100644
--- a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp
+++ b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp
@@ -6,7 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-// test <cstdbool>
+// test <cstdbool> // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
 
 #include <cstdbool>
 
diff --git a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp
index 2c8d054fbc527df..2e4679980577a9b 100644
--- a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp
+++ b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp
@@ -6,7 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-// <ctgmath>
+// <ctgmath> // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
 
 #include <ctgmath>
 
diff --git a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp
index cc3f8cd6a9beb95..0ed116c6410639c 100644
--- a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp
+++ b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp
@@ -6,7 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-// <ccomplex>
+// <ccomplex> // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
 
 #include <ccomplex>
 
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp
index 1203b2f3ec18f92..00f9e2b8467837d 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp
@@ -34,15 +34,32 @@ struct test_alloc {
     typedef test_alloc<U, Sz> other;
   };
 
-  TEST_CONSTEXPR_CXX14 pointer allocate(size_type n, const void* = nullptr) {
+  TEST_CONSTEXPR test_alloc() TEST_NOEXCEPT {}
+
+  template <class U>
+  TEST_CONSTEXPR test_alloc(const test_alloc<U, Sz>&) TEST_NOEXCEPT {}
+
+  pointer allocate(size_type n, const void* = nullptr) {
     allocated_ += n;
     return std::allocator<value_type>().allocate(n);
   }
 
-  TEST_CONSTEXPR_CXX14 void deallocate(pointer p, size_type s) {
+  void deallocate(pointer p, size_type s) {
     allocated_ -= s;
     std::allocator<value_type>().deallocate(p, s);
   }
+
+  template <class U>
+  friend TEST_CONSTEXPR bool operator==(const test_alloc&, const test_alloc<U, Sz>&) TEST_NOEXCEPT {
+    return true;
+  }
+
+#if TEST_STD_VER < 20
+  template <class U>
+  friend TEST_CONSTEXPR bool operator!=(const test_alloc&, const test_alloc<U, Sz>&) TEST_NOEXCEPT {
+    return false;
+  }
+#endif
 };
 
 template <class Sz>
diff --git a/libcxx/test/std/utilities/expected/types.h b/libcxx/test/std/utilities/expected/types.h
index 2b6983fb399c672..df73ebdfe495ee3 100644
--- a/libcxx/test/std/utilities/expected/types.h
+++ b/libcxx/test/std/utilities/expected/types.h
@@ -162,7 +162,7 @@ template <int Constant>
 struct TailClobberer {
   constexpr TailClobberer() noexcept {
     if (!std::is_constant_evaluated()) {
-      std::memset(this, Constant, sizeof(*this));
+      std::memset(static_cast<void*>(this), Constant, sizeof(*this));
     }
     // Always set `b` itself to `false` so that the comparison works.
     b = false;
@@ -245,7 +245,7 @@ struct BoolWithPadding {
   constexpr explicit BoolWithPadding() noexcept : BoolWithPadding(false) {}
   constexpr BoolWithPadding(bool val) noexcept {
     if (!std::is_constant_evaluated()) {
-      std::memset(this, 0, sizeof(*this));
+      std::memset(static_cast<void*>(this), 0, sizeof(*this));
     }
     val_ = val;
   }
@@ -268,7 +268,7 @@ struct IntWithoutPadding {
   constexpr explicit IntWithoutPadding() noexcept : IntWithoutPadding(0) {}
   constexpr IntWithoutPadding(int val) noexcept {
     if (!std::is_constant_evaluated()) {
-      std::memset(this, 0, sizeof(*this));
+      std::memset(static_cast<void*>(this), 0, sizeof(*this));
     }
     val_ = val;
   }
diff --git a/libcxx/test/support/MinSequenceContainer.h b/libcxx/test/support/MinSequenceContainer.h
new file mode 100644
index 000000000000000..d0e29ae40c400d3
--- /dev/null
+++ b/libcxx/test/support/MinSequenceContainer.h
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_MIN_SEQUENCE_CONTAINER_H
+#define SUPPORT_MIN_SEQUENCE_CONTAINER_H
+
+#include <initializer_list>
+#include <vector>
+
+#include "test_iterators.h"
+
+template <class T, class Iterator = random_access_iterator<T*>, class ConstIterator = random_access_iterator<const T*>>
+struct MinSequenceContainer {
+  using value_type      = T;
+  using difference_type = int;
+  using size_type       = unsigned int;
+  using iterator        = Iterator;
+  using const_iterator  = ConstIterator;
+
+  explicit MinSequenceContainer() = default;
+  template <class It>
+  explicit MinSequenceContainer(It first, It last) : data_(first, last) {}
+  MinSequenceContainer(std::initializer_list<T> il) : data_(il) {}
+  iterator begin() { return iterator(data_.data()); }
+  const_iterator begin() const { return const_iterator(data_.data()); }
+  const_iterator cbegin() const { return const_iterator(data_.data()); }
+  iterator end() { return begin() + size(); }
+  const_iterator end() const { return begin() + size(); }
+  size_type size() const { return data_.size(); }
+  bool empty() const { return data_.empty(); }
+
+  void clear() { data_.clear(); }
+
+  template <class It>
+  iterator insert(const_iterator p, It first, It last) {
+    return from_vector_iterator(data_.insert(to_vector_iterator(p), first, last));
+  }
+
+  iterator insert(const_iterator p, T value) {
+    return from_vector_iterator(data_.insert(to_vector_iterator(p), std::move(value)));
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    return from_vector_iterator(data_.erase(to_vector_iterator(first), to_vector_iterator(last)));
+  }
+
+  iterator erase(const_iterator iter) { return from_vector_iterator(data_.erase(to_vector_iterator(iter))); }
+
+  template <class... Args>
+  iterator emplace(const_iterator pos, Args&&... args) {
+    return from_vector_iterator(data_.emplace(to_vector_iterator(pos), std::forward<Args>(args)...));
+  }
+
+private:
+  std::vector<T>::const_iterator to_vector_iterator(const_iterator cit) const { return cit - cbegin() + data_.begin(); }
+
+  iterator from_vector_iterator(std::vector<T>::iterator it) { return it - data_.begin() + begin(); }
+
+  std::vector<T> data_;
+};
+
+namespace MinSequenceContainer_detail {
+
+// MinSequenceContainer is non-allocator-aware, because flat_set supports
+// such (non-STL) container types, and we want to make sure they are supported.
+template <class T>
+concept HasAllocatorType = requires { typename T::allocator_type; };
+static_assert(!HasAllocatorType<MinSequenceContainer<int>>);
+
+// MinSequenceContainer by itself doesn't support .emplace(), because we want
+// to at least somewhat support (non-STL) container types with nothing but .insert().
+template <class T>
+concept HasEmplace = requires(T& t) { t.emplace(42); };
+static_assert(!HasEmplace<MinSequenceContainer<int>>);
+
+} // namespace MinSequenceContainer_detail
+
+#endif // SUPPORT_MIN_SEQUENCE_CONTAINER_H
diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h
index 13ee98289c36b7a..18f51f8072640d1 100644
--- a/libcxx/test/support/min_allocator.h
+++ b/libcxx/test/support/min_allocator.h
@@ -465,14 +465,14 @@ class safe_allocator {
   TEST_CONSTEXPR_CXX20 T* allocate(std::size_t n) {
     T* memory = std::allocator<T>().allocate(n);
     if (!TEST_IS_CONSTANT_EVALUATED)
-      std::memset(memory, 0, sizeof(T) * n);
+      std::memset(static_cast<void*>(memory), 0, sizeof(T) * n);
 
     return memory;
   }
 
   TEST_CONSTEXPR_CXX20 void deallocate(T* p, std::size_t n) {
     if (!TEST_IS_CONSTANT_EVALUATED)
-      DoNotOptimize(std::memset(p, 0, sizeof(T) * n));
+      DoNotOptimize(std::memset(static_cast<void*>(p), 0, sizeof(T) * n));
     std::allocator<T>().deallocate(p, n);
   }
 
diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h
index 5ef14e54dae237c..1b6473b623c53b9 100644
--- a/libcxx/test/support/test_macros.h
+++ b/libcxx/test/support/test_macros.h
@@ -511,7 +511,7 @@ inline Tp const& DoNotOptimize(Tp const& value) {
 #  define TEST_CONSTEXPR_OPERATOR_NEW
 #endif
 
-#if __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__
+#if defined(_MSC_VER) || __SIZEOF_LONG_DOUBLE__ == __SIZEOF_DOUBLE__
 #  define TEST_LONG_DOUBLE_IS_DOUBLE
 #endif
 
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 7ab1af93d177406..197d6bbc692226c 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -942,7 +942,11 @@ def add_version_header(tc):
         },
         {
             "name": "__cpp_lib_optional",
-            "values": {"c++17": 201606, "c++23": 202110},
+            "values": {
+                "c++17": 201606,
+                "c++20": 202106,  # P2231R1 Missing constexpr in std::optional and std::variant
+                "c++23": 202110,  # P0798R8 Monadic operations for std::optional + LWG3621 Remove feature-test macro __cpp_lib_monadic_optional
+            },
             "headers": ["optional"],
         },
         {
@@ -1406,8 +1410,8 @@ def add_version_header(tc):
             "name": "__cpp_lib_variant",
             "values": {
                 "c++17": 202102,  # std::visit for classes derived from std::variant
-                # "c++20": 202106,  # Fully constexpr std::variant
-                # "c++26": 202306,  # Member visit (implemented)
+                "c++20": 202106,  # P2231R1 Missing constexpr in std::optional and std::variant
+                "c++26": 202306,  # P2637R3 Member visit
             },
             "headers": ["variant"],
         },
diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py
index 528eb9995e19f4d..cac620e4f1fe777 100644
--- a/libcxx/utils/libcxx/header_information.py
+++ b/libcxx/utils/libcxx/header_information.py
@@ -66,6 +66,7 @@ def is_cstd(self) -> bool:
             "cmath",
             "csetjmp",
             "csignal",
+            "cstdalign",
             "cstdarg",
             "cstdbool",
             "cstddef",
@@ -92,7 +93,7 @@ def has_cxx20_module(self) -> bool:
         experimental headers.
         """
         # These headers have been removed in C++20 so are never part of a module.
-        removed_in_20 = ["ccomplex", "ciso646", "cstdbool", "ctgmath"]
+        removed_in_20 = ["ccomplex", "ciso646", "cstdalign", "cstdbool", "ctgmath"]
         return self.is_public() and not self.is_experimental() and not self.is_C_compatibility() and not self._name in removed_in_20
 
     def is_cxx03_frozen_header(self) -> bool:
@@ -163,7 +164,6 @@ def __hash__(self) -> int:
 # modules will fail to build if a header is added but this list is not updated.
 headers_not_available = list(map(Header, [
     "debugging",
-    "flat_map",
     "flat_set",
     "generator",
     "hazard_pointer",
@@ -237,6 +237,15 @@ def __hash__(self) -> int:
     "wctype.h": "// UNSUPPORTED: no-wide-characters",
 }
 
+# Undeprecate headers that are deprecated in C++17 and removed in C++20.
+lit_header_undeprecations = {
+    "ccomplex": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS",
+    "ciso646": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS",
+    "cstdalign": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS",
+    "cstdbool": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS",
+    "ctgmath": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS",
+}
+
 # This table was produced manually, by grepping the TeX source of the Standard's
 # library clauses for the string "#include". Each header's synopsis contains
 # explicit "#include" directives for its mandatory inclusions.
@@ -251,6 +260,7 @@ def __hash__(self) -> int:
     "coroutine": ["compare"],
     "deque": ["compare", "initializer_list"],
     "filesystem": ["compare"],
+    "flat_map": ["compare", "initializer_list"],
     "forward_list": ["compare", "initializer_list"],
     "ios": ["iosfwd"],
     "iostream": ["ios", "istream", "ostream", "streambuf"],
diff --git a/libunwind/src/DwarfInstructions.hpp b/libunwind/src/DwarfInstructions.hpp
index bd9ece60ee5881a..e7be0d6d5d63549 100644
--- a/libunwind/src/DwarfInstructions.hpp
+++ b/libunwind/src/DwarfInstructions.hpp
@@ -74,8 +74,10 @@ class DwarfInstructions {
     __builtin_unreachable();
   }
 #if defined(_LIBUNWIND_TARGET_AARCH64)
-  static bool getRA_SIGN_STATE(A &addressSpace, R registers, pint_t cfa,
-                               PrologInfo &prolog);
+  static bool isReturnAddressSigned(A &addressSpace, R registers, pint_t cfa,
+                                    PrologInfo &prolog);
+  static bool isReturnAddressSignedWithPC(A &addressSpace, R registers,
+                                          pint_t cfa, PrologInfo &prolog);
 #endif
 };
 
@@ -173,8 +175,9 @@ v128 DwarfInstructions<A, R>::getSavedVectorRegister(
 }
 #if defined(_LIBUNWIND_TARGET_AARCH64)
 template <typename A, typename R>
-bool DwarfInstructions<A, R>::getRA_SIGN_STATE(A &addressSpace, R registers,
-                                               pint_t cfa, PrologInfo &prolog) {
+bool DwarfInstructions<A, R>::isReturnAddressSigned(A &addressSpace,
+                                                    R registers, pint_t cfa,
+                                                    PrologInfo &prolog) {
   pint_t raSignState;
   auto regloc = prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE];
   if (regloc.location == CFI_Parser<A>::kRegisterUnused)
@@ -185,6 +188,22 @@ bool DwarfInstructions<A, R>::getRA_SIGN_STATE(A &addressSpace, R registers,
   // Only bit[0] is meaningful.
   return raSignState & 0x01;
 }
+
+template <typename A, typename R>
+bool DwarfInstructions<A, R>::isReturnAddressSignedWithPC(A &addressSpace,
+                                                          R registers,
+                                                          pint_t cfa,
+                                                          PrologInfo &prolog) {
+  pint_t raSignState;
+  auto regloc = prolog.savedRegisters[UNW_AARCH64_RA_SIGN_STATE];
+  if (regloc.location == CFI_Parser<A>::kRegisterUnused)
+    raSignState = static_cast<pint_t>(regloc.value);
+  else
+    raSignState = getSavedRegister(addressSpace, registers, cfa, regloc);
+
+  // Only bit[1] is meaningful.
+  return raSignState & 0x02;
+}
 #endif
 
 template <typename A, typename R>
@@ -288,7 +307,7 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
       // restored. autia1716 is used instead of autia as autia1716 assembles
       // to a NOP on pre-v8.3a architectures.
       if ((R::getArch() == REGISTERS_ARM64) &&
-          getRA_SIGN_STATE(addressSpace, registers, cfa, prolog) &&
+          isReturnAddressSigned(addressSpace, registers, cfa, prolog) &&
           returnAddress != 0) {
 #if !defined(_LIBUNWIND_IS_NATIVE_ONLY)
         return UNW_ECROSSRASIGNING;
@@ -296,13 +315,29 @@ int DwarfInstructions<A, R>::stepWithDwarf(A &addressSpace, pint_t pc,
         register unsigned long long x17 __asm("x17") = returnAddress;
         register unsigned long long x16 __asm("x16") = cfa;
 
-        // These are the autia1716/autib1716 instructions. The hint instructions
-        // are used here as gcc does not assemble autia1716/autib1716 for pre
-        // armv8.3a targets.
-        if (cieInfo.addressesSignedWithBKey)
-          asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716
-        else
-          asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716
+        // We use the hint versions of the authentication instructions below to
+        // ensure they're assembled by the compiler even for targets with no
+        // FEAT_PAuth/FEAT_PAuth_LR support.
+        if (isReturnAddressSignedWithPC(addressSpace, registers, cfa, prolog)) {
+          register unsigned long long x15 __asm("x15") =
+              prolog.ptrAuthDiversifier;
+          if (cieInfo.addressesSignedWithBKey) {
+            asm("hint 0x27\n\t" // pacm
+                "hint 0xe"
+                : "+r"(x17)
+                : "r"(x16), "r"(x15)); // autib1716
+          } else {
+            asm("hint 0x27\n\t" // pacm
+                "hint 0xc"
+                : "+r"(x17)
+                : "r"(x16), "r"(x15)); // autia1716
+          }
+        } else {
+          if (cieInfo.addressesSignedWithBKey)
+            asm("hint 0xe" : "+r"(x17) : "r"(x16)); // autib1716
+          else
+            asm("hint 0xc" : "+r"(x17) : "r"(x16)); // autia1716
+        }
         returnAddress = x17;
 #endif
       }
diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp
index 0682942ce13799e..7e85025dd054d5d 100644
--- a/libunwind/src/DwarfParser.hpp
+++ b/libunwind/src/DwarfParser.hpp
@@ -91,6 +91,9 @@ class CFI_Parser {
     int64_t           cfaExpression;      // CFA = expression
     uint32_t          spExtraArgSize;
     RegisterLocation  savedRegisters[kMaxRegisterNumber + 1];
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+    pint_t ptrAuthDiversifier;
+#endif
     enum class InitializeTime { kLazy, kNormal };
 
     // When saving registers, this data structure is lazily initialized.
@@ -799,6 +802,24 @@ bool CFI_Parser<A>::parseFDEInstructions(A &addressSpace,
         }
         break;
 
+#if defined(_LIBUNWIND_TARGET_AARCH64)
+      case DW_CFA_AARCH64_negate_ra_state_with_pc: {
+        int64_t value =
+            results->savedRegisters[UNW_AARCH64_RA_SIGN_STATE].value ^ 0x3;
+        results->setRegisterValue(UNW_AARCH64_RA_SIGN_STATE, value,
+                                  initialState);
+        // When calculating the value of the PC, it is assumed that the CFI
+        // instruction is placed before the signing instruction, however it is
+        // placed after. Because of this, we need to take into account the CFI
+        // instruction is one instruction call later than expected, and reduce
+        // the PC value by 4 bytes to compensate.
+        results->ptrAuthDiversifier = fdeInfo.pcStart + codeOffset - 0x4;
+        _LIBUNWIND_TRACE_DWARF(
+            "DW_CFA_AARCH64_negate_ra_state_with_pc(pc=0x%" PRIx64 ")\n",
+            static_cast<uint64_t>(results->ptrAuthDiversifier));
+      } break;
+#endif
+
 #else
         (void)arch;
 #endif
diff --git a/libunwind/src/dwarf2.h b/libunwind/src/dwarf2.h
index 174277d5a795084..68ad882347203ca 100644
--- a/libunwind/src/dwarf2.h
+++ b/libunwind/src/dwarf2.h
@@ -18,43 +18,43 @@
 
 // DWARF unwind instructions
 enum {
-  DW_CFA_nop                 = 0x0,
-  DW_CFA_set_loc             = 0x1,
-  DW_CFA_advance_loc1        = 0x2,
-  DW_CFA_advance_loc2        = 0x3,
-  DW_CFA_advance_loc4        = 0x4,
-  DW_CFA_offset_extended     = 0x5,
-  DW_CFA_restore_extended    = 0x6,
-  DW_CFA_undefined           = 0x7,
-  DW_CFA_same_value          = 0x8,
-  DW_CFA_register            = 0x9,
-  DW_CFA_remember_state      = 0xA,
-  DW_CFA_restore_state       = 0xB,
-  DW_CFA_def_cfa             = 0xC,
-  DW_CFA_def_cfa_register    = 0xD,
-  DW_CFA_def_cfa_offset      = 0xE,
-  DW_CFA_def_cfa_expression  = 0xF,
-  DW_CFA_expression         = 0x10,
+  DW_CFA_nop = 0x0,
+  DW_CFA_set_loc = 0x1,
+  DW_CFA_advance_loc1 = 0x2,
+  DW_CFA_advance_loc2 = 0x3,
+  DW_CFA_advance_loc4 = 0x4,
+  DW_CFA_offset_extended = 0x5,
+  DW_CFA_restore_extended = 0x6,
+  DW_CFA_undefined = 0x7,
+  DW_CFA_same_value = 0x8,
+  DW_CFA_register = 0x9,
+  DW_CFA_remember_state = 0xA,
+  DW_CFA_restore_state = 0xB,
+  DW_CFA_def_cfa = 0xC,
+  DW_CFA_def_cfa_register = 0xD,
+  DW_CFA_def_cfa_offset = 0xE,
+  DW_CFA_def_cfa_expression = 0xF,
+  DW_CFA_expression = 0x10,
   DW_CFA_offset_extended_sf = 0x11,
-  DW_CFA_def_cfa_sf         = 0x12,
-  DW_CFA_def_cfa_offset_sf  = 0x13,
-  DW_CFA_val_offset         = 0x14,
-  DW_CFA_val_offset_sf      = 0x15,
-  DW_CFA_val_expression     = 0x16,
-  DW_CFA_advance_loc        = 0x40, // high 2 bits are 0x1, lower 6 bits are delta
-  DW_CFA_offset             = 0x80, // high 2 bits are 0x2, lower 6 bits are register
-  DW_CFA_restore            = 0xC0, // high 2 bits are 0x3, lower 6 bits are register
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+  DW_CFA_advance_loc = 0x40, // high 2 bits are 0x1, lower 6 bits are delta
+  DW_CFA_offset = 0x80,      // high 2 bits are 0x2, lower 6 bits are register
+  DW_CFA_restore = 0xC0,     // high 2 bits are 0x3, lower 6 bits are register
 
   // GNU extensions
-  DW_CFA_GNU_window_save              = 0x2D,
-  DW_CFA_GNU_args_size                = 0x2E,
+  DW_CFA_GNU_window_save = 0x2D,
+  DW_CFA_GNU_args_size = 0x2E,
   DW_CFA_GNU_negative_offset_extended = 0x2F,
 
   // AARCH64 extensions
-  DW_CFA_AARCH64_negate_ra_state      = 0x2D
+  DW_CFA_AARCH64_negate_ra_state_with_pc = 0x2C,
+  DW_CFA_AARCH64_negate_ra_state = 0x2D
 };
 
-
 // FSF exception handling Pointer-Encoding constants
 // Used in CFI augmentation by GCC
 enum {
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index c6986681dffe77a..33fb20ffeaf3212 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -570,8 +570,7 @@ void SectionChunk::getBaserels(std::vector<Baserel> *res) {
 // another DLL) This returns the size the relocation is supposed to update,
 // in bits, or 0 if the relocation cannot be handled as a runtime pseudo
 // relocation.
-static int getRuntimePseudoRelocSize(uint16_t type,
-                                     llvm::COFF::MachineTypes machine) {
+static int getRuntimePseudoRelocSize(uint16_t type, Triple::ArchType arch) {
   // Relocations that either contain an absolute address, or a plain
   // relative offset, since the runtime pseudo reloc implementation
   // adds 8/16/32/64 bit values to a memory address.
@@ -597,8 +596,8 @@ static int getRuntimePseudoRelocSize(uint16_t type,
   // the image, or temporarily changed at runtime with VirtualProtect.
   // Since this only operates on direct address values, it doesn't work for
   // ARM/ARM64 relocations, other than the plain ADDR32/ADDR64 relocations.
-  switch (machine) {
-  case AMD64:
+  switch (arch) {
+  case Triple::x86_64:
     switch (type) {
     case IMAGE_REL_AMD64_ADDR64:
       return 64;
@@ -613,7 +612,7 @@ static int getRuntimePseudoRelocSize(uint16_t type,
     default:
       return 0;
     }
-  case I386:
+  case Triple::x86:
     switch (type) {
     case IMAGE_REL_I386_DIR32:
     case IMAGE_REL_I386_REL32:
@@ -621,14 +620,14 @@ static int getRuntimePseudoRelocSize(uint16_t type,
     default:
       return 0;
     }
-  case ARMNT:
+  case Triple::thumb:
     switch (type) {
     case IMAGE_REL_ARM_ADDR32:
       return 32;
     default:
       return 0;
     }
-  case ARM64:
+  case Triple::aarch64:
     switch (type) {
     case IMAGE_REL_ARM64_ADDR64:
       return 64;
@@ -661,8 +660,7 @@ void SectionChunk::getRuntimePseudoRelocs(
     // alive. Thus such dangling references in DWARF sections are expected.
     if (!target->getChunk())
       continue;
-    int sizeInBits =
-        getRuntimePseudoRelocSize(rel.Type, file->ctx.config.machine);
+    int sizeInBits = getRuntimePseudoRelocSize(rel.Type, getArch());
     if (sizeInBits == 0) {
       error("unable to automatically import from " + target->getName() +
             " with relocation type " +
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index e7f768789271fac..08c1476a595f644 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -2518,9 +2518,19 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
         Symbol *sym = ctx.symtab.find(from);
         if (!sym)
           continue;
-        if (auto *u = dyn_cast<Undefined>(sym))
-          if (!u->weakAlias)
-            u->setWeakAlias(ctx.symtab.addUndefined(to));
+        if (auto *u = dyn_cast<Undefined>(sym)) {
+          if (u->weakAlias) {
+            // On ARM64EC, anti-dependency aliases are treated as undefined
+            // symbols unless a demangled symbol aliases a defined one, which is
+            // part of the implementation.
+            if (!isArm64EC(ctx.config.machine) || !u->isAntiDep)
+              continue;
+            if (!isa<Undefined>(u->weakAlias) &&
+                !isArm64ECMangledFunctionName(u->getName()))
+              continue;
+          }
+          u->setWeakAlias(ctx.symtab.addUndefined(to));
+        }
       }
 
       // If any inputs are bitcode files, the LTO code generator may create
diff --git a/lld/ELF/Arch/ARM.cpp b/lld/ELF/Arch/ARM.cpp
index be3f80337aae71c..271e2e27a269494 100644
--- a/lld/ELF/Arch/ARM.cpp
+++ b/lld/ELF/Arch/ARM.cpp
@@ -1207,7 +1207,7 @@ template <class ELFT> void ObjFile<ELFT>::importCmseSymbols() {
     Defined *sym = reinterpret_cast<Defined *>(make<SymbolUnion>());
 
     // Initialize symbol fields.
-    memset(sym, 0, sizeof(Symbol));
+    memset(static_cast<void *>(sym), 0, sizeof(Symbol));
     sym->setName(CHECK(eSyms[i].getName(stringTable), this));
     sym->value = eSym.st_value;
     sym->size = eSym.st_size;
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index c80c4017d3512cf..ebe772042642106 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -720,7 +720,7 @@ def: J<"plugin-opt=opt-remarks-format=">,
   HelpText<"Alias for --opt-remarks-format">;
 def: F<"plugin-opt=opt-remarks-with-hotness">,
   Alias<opt_remarks_with_hotness>,
-  HelpText<"Alias for --opt-remarks-with_hotness">;
+  HelpText<"Alias for --opt-remarks-with-hotness">;
 def: J<"plugin-opt=opt-remarks-hotness-threshold=">,
   Alias<opt_remarks_hotness_threshold>,
   HelpText<"Alias for --opt-remarks-hotness-threshold">;
diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp
index 674b1ef983f8430..8878acdc43e87ff 100644
--- a/lld/ELF/SymbolTable.cpp
+++ b/lld/ELF/SymbolTable.cpp
@@ -56,7 +56,7 @@ void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) {
   // alias for sym, but that could degrade the user experience of some tools
   // that can print out only one symbol for each location: sym is a preferred
   // name than real, but they might print out real instead.
-  memcpy(real, sym, sizeof(SymbolUnion));
+  memcpy(static_cast<void *>(real), sym, sizeof(SymbolUnion));
   real->isUsedInRegularObj = false;
 }
 
@@ -87,7 +87,7 @@ Symbol *SymbolTable::insert(StringRef name) {
   symVector.push_back(sym);
 
   // *sym was not initialized by a constructor. Initialize all Symbol fields.
-  memset(sym, 0, sizeof(Symbol));
+  memset(static_cast<void *>(sym), 0, sizeof(Symbol));
   sym->setName(name);
   sym->partition = 1;
   sym->versionId = VER_NDX_GLOBAL;
diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h
index 339f32e05f16254..8c9c9a56cfbc720 100644
--- a/lld/ELF/Symbols.h
+++ b/lld/ELF/Symbols.h
@@ -75,7 +75,7 @@ class Symbol {
 
   // The default copy constructor is deleted due to atomic flags. Define one for
   // places where no atomic is needed.
-  Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); }
+  Symbol(const Symbol &o) { memcpy(static_cast<void *>(this), &o, sizeof(o)); }
 
 protected:
   const char *nameData;
diff --git a/lld/MachO/BPSectionOrderer.cpp b/lld/MachO/BPSectionOrderer.cpp
index 07b44d48d659328..5db2242a35ef286 100644
--- a/lld/MachO/BPSectionOrderer.cpp
+++ b/lld/MachO/BPSectionOrderer.cpp
@@ -53,11 +53,8 @@ getRelocHash(const Reloc &reloc,
     kind = ("Section " + Twine(static_cast<uint8_t>(isec->kind()))).str();
   if (auto *sym = reloc.referent.dyn_cast<Symbol *>()) {
     kind += (" Symbol " + Twine(static_cast<uint8_t>(sym->kind()))).str();
-    if (auto *d = dyn_cast<Defined>(sym)) {
-      if (isa_and_nonnull<CStringInputSection>(isec))
-        return getRelocHash(kind, 0, isec->getOffset(d->value), reloc.addend);
+    if (auto *d = dyn_cast<Defined>(sym))
       return getRelocHash(kind, sectionIdx.value_or(0), d->value, reloc.addend);
-    }
   }
   return getRelocHash(kind, sectionIdx.value_or(0), 0, reloc.addend);
 }
diff --git a/lld/test/COFF/arm64ec-altnames.s b/lld/test/COFF/arm64ec-altnames.s
new file mode 100644
index 000000000000000..fb28ae15895f989
--- /dev/null
+++ b/lld/test/COFF/arm64ec-altnames.s
@@ -0,0 +1,109 @@
+REQUIRES: aarch64
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows ext.s -o ext.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows impl.s -o impl.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows impl-cpp.s -o impl-cpp.obj
+RUN: llvm-mc -filetype=obj -triple=arm64ec-windows %S/Inputs/loadconfig-arm64ec.s -o loadconfig.obj
+
+# Ensure -alternatename can change a mangled function symbol aliasing a defined symbol (typically a guest exit thunk).
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out1.dll ext.obj loadconfig.obj "-alternatename:#func=altsym"
+
+RUN: llvm-objdump -d out1.dll | FileCheck --check-prefix=DISASM %s
+DISASM:      0000000180001000 <.text>:
+DISASM-NEXT: 180001000: 52800020     mov     w0, #0x1                // =1
+DISASM-NEXT: 180001004: d65f03c0     ret
+DISASM-NOT: .thnk
+
+RUN: llvm-readobj --hex-dump=.test out1.dll | FileCheck --check-prefix=TESTSEC %s
+TESTSEC: 0x180004000 00100000 00100000
+
+# Ensure -alternatename can change a demangled function symbol aliasing an anti-dependency symbol.
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out2.dll ext.obj loadconfig.obj -alternatename:func=altsym
+
+RUN: llvm-objdump -d out2.dll | FileCheck --check-prefix=DISASM2 %s
+DISASM2:      Disassembly of section .text:
+DISASM2-EMPTY:
+DISASM2-NEXT: 0000000180001000 <.text>:
+DISASM2-NEXT: 180001000: 52800020     mov     w0, #0x1                // =1
+DISASM2-NEXT: 180001004: d65f03c0     ret
+DISASM2-EMPTY:
+DISASM2-NEXT: Disassembly of section .thnk:
+DISASM2-EMPTY:
+DISASM2-NEXT: 0000000180005000 <.thnk>:
+DISASM2-NEXT: 180005000: 52800040     mov     w0, #0x2                // =2
+DISASM2-NEXT: 180005004: d65f03c0     ret
+
+RUN: llvm-readobj --hex-dump=.test out2.dll | FileCheck --check-prefix=TESTSEC2 %s
+TESTSEC2: 0x180004000 00100000 00500000
+
+# Ensure -alternatename cannot modify a demangled function symbol aliasing a defined symbol.
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out3.dll impl.obj loadconfig.obj -alternatename:func=altsym
+RUN: llvm-objdump -d out3.dll | FileCheck --check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test out3.dll | FileCheck --check-prefix=TESTSEC %s
+
+RUN: lld-link -machine:arm64ec -dll -noentry -out:out4.dll impl-cpp.obj loadconfig.obj -alternatename:func=altsym
+RUN: llvm-objdump -d out4.dll | FileCheck --check-prefix=DISASM %s
+RUN: llvm-readobj --hex-dump=.test out4.dll | FileCheck --check-prefix=TESTSEC %s
+
+#--- ext.s
+        .weak_anti_dep func
+.set func, "#func"
+        .weak_anti_dep "#func"
+.set "#func", thunksym
+
+        .section .test, "r"
+        .rva func
+        .rva "#func"
+
+        .section .thnk,"xr",discard,thunksym
+thunksym:
+        mov w0, #2
+        ret
+
+        .section .text,"xr",discard,altsym
+        .globl altsym
+altsym:
+        mov w0, #1
+        ret
+
+#--- impl.s
+        .weak_anti_dep func
+.set func, "#func"
+
+        .section .test, "r"
+        .rva func
+        .rva "#func"
+
+        .section .text,"xr",discard,"#func"
+"#func":
+        mov w0, #1
+        ret
+
+        .section .text,"xr",discard,altsym
+        .globl altsym
+altsym:
+        mov w0, #2
+        ret
+
+#--- impl-cpp.s
+        .weak_anti_dep func
+.set func, "?func@@$$hYAXXZ"
+
+        .section .test, "r"
+        .rva func
+        .rva "?func@@$$hYAXXZ"
+
+        .section .text,"xr",discard,"?func@@$$hYAXXZ"
+"?func@@$$hYAXXZ":
+        mov w0, #1
+        ret
+
+        .section .text,"xr",discard,altsym
+        .globl altsym
+altsym:
+        mov w0, #2
+        ret
diff --git a/lld/test/COFF/autoimport-arm64ec-data.test b/lld/test/COFF/autoimport-arm64ec-data.test
new file mode 100644
index 000000000000000..4d71b55f651a654
--- /dev/null
+++ b/lld/test/COFF/autoimport-arm64ec-data.test
@@ -0,0 +1,56 @@
+# REQUIRES: aarch64, x86
+RUN: split-file %s %t.dir && cd %t.dir
+
+RUN: llvm-lib -machine:arm64ec -out:libtest.a -def:test.def
+RUN: llvm-mc -triple=arm64ec-windows-gnu arm64ec.s -filetype=obj -o arm64ec.obj
+RUN: llvm-mc -triple=arm64ec-windows-gnu x86_64.s -filetype=obj -o x86_64.obj
+
+RUN: lld-link -machine:arm64ec -out:out.dll -dll -noentry x86_64.obj arm64ec.obj libtest.a -lldmingw
+
+RUN: llvm-readobj --coff-imports out.dll | FileCheck -check-prefix=IMPORTS %s
+RUN: llvm-objdump -s out.dll | FileCheck --check-prefix=CONTENTS %s
+
+IMPORTS:      Import {
+IMPORTS-NEXT:   Name: test.dll
+IMPORTS-NEXT:   ImportLookupTableRVA: 0x40E0
+IMPORTS-NEXT:   ImportAddressTableRVA: 0x3000
+IMPORTS-NEXT:   Symbol: variable (0)
+IMPORTS-NEXT: }
+
+Runtime pseudo relocation list header at 0x401c, consisting of 0x0, 0x0, 0x1.
+The first runtime pseudo relocation is from an x86_64 object file, with import
+from 0x3000, applied at 0x7000 with a size of 32 bits. The second pseudo
+relocation is from an ARM64EC object file, with import from 0x3000, applied
+at 0x7008 with a size of 32 bits.
+
+CONTENTS: Contents of section .rdata:
+CONTENTS:  180004010 00200000 10200000 00200000 00000000
+CONTENTS:  180004020 00000000 01000000 00300000 00700000
+CONTENTS:  180004030 40000000 00300000 08700000 40000000
+
+CONTENTS:      Contents of section .test:
+CONTENTS-NEXT:  180007000 00300080 01000000 00300080 01000000
+CONTENTS-NEXT:  180007010 1c400080 01000000 40400080 01000000
+
+#--- arm64ec.s
+    .text
+    .global "#_pei386_runtime_relocator"
+"#_pei386_runtime_relocator":
+    ret
+
+    .weak_anti_dep _pei386_runtime_relocator
+.set _pei386_runtime_relocator,"#_pei386_runtime_relocator"
+
+    .section .test,"dr"
+    .quad variable
+    .quad __RUNTIME_PSEUDO_RELOC_LIST__
+    .quad __RUNTIME_PSEUDO_RELOC_LIST_END__
+
+#--- x86_64.s
+    .section .test,"dr"
+    .quad variable
+
+#--- test.def
+LIBRARY test.dll
+EXPORTS
+    variable DATA
diff --git a/lld/test/wasm/custom-section-name.ll b/lld/test/wasm/custom-section-name.ll
index b860ef5a83e8364..8799fbf36056d1d 100644
--- a/lld/test/wasm/custom-section-name.ll
+++ b/lld/test/wasm/custom-section-name.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj %s -o %t.o
+; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o %t.o
 ; RUN: wasm-ld -no-gc-sections --no-entry -o %t.wasm %t.o
 ; RUN: obj2yaml %t.wasm | FileCheck %s --check-prefixes=CHECK,NO-BSS
 ; RUN: wasm-ld -no-gc-sections --no-entry --import-memory -o %t.bss.wasm %t.o
diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll
index 670ac3c1f373faf..41868a0b2b50b6f 100644
--- a/lld/test/wasm/data-segments.ll
+++ b/lld/test/wasm/data-segments.ll
@@ -1,4 +1,4 @@
-; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.o -mattr=+atomics
+; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.o -mattr=+atomics,-bulk-memory
 ; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.bulk-mem.o -mattr=+bulk-memory
 ; RUN: llc --mtriple=wasm64-unknown-unknown -filetype=obj %s -o %t.bulk-mem64.o -mattr=+bulk-memory
 ; RUN: llc --mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t.atomics.bulk-mem.o -mattr=+atomics,+bulk-memory
diff --git a/lld/test/wasm/lto/Inputs/libcall-archive.ll b/lld/test/wasm/lto/Inputs/libcall-archive.ll
index 9d05efdeae0806e..7d8c34196dfe49a 100644
--- a/lld/test/wasm/lto/Inputs/libcall-archive.ll
+++ b/lld/test/wasm/lto/Inputs/libcall-archive.ll
@@ -1,6 +1,8 @@
 target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
 target triple = "wasm32-unknown-unknown"
 
-define void @memcpy() {
+define void @memcpy() #0 {
   ret void
 }
+
+attributes #0 = { "target-features"="-bulk-memory" }
diff --git a/lld/test/wasm/lto/libcall-archive.ll b/lld/test/wasm/lto/libcall-archive.ll
index 2f785b98976ec88..5c46d2f7ed78381 100644
--- a/lld/test/wasm/lto/libcall-archive.ll
+++ b/lld/test/wasm/lto/libcall-archive.ll
@@ -8,7 +8,7 @@
 target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128-ni:1:10:20"
 target triple = "wasm32-unknown-unknown"
 
-define void @_start(ptr %a, ptr %b) {
+define void @_start(ptr %a, ptr %b) #0 {
 entry:
   call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr %b, i64 1024, i1 false)
   ret void
@@ -16,6 +16,8 @@ entry:
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1)
 
+attributes #0 = { "target-features"="-bulk-memory" }
+
 ; CHECK:       - Type:            CUSTOM
 ; CHECK-NEXT:    Name:            name
 ; CHECK-NEXT:    FunctionNames:
diff --git a/lld/test/wasm/lto/stub-library-libcall.s b/lld/test/wasm/lto/stub-library-libcall.s
index ce88a32dd99dc7b..d65983c0cf5bf52 100644
--- a/lld/test/wasm/lto/stub-library-libcall.s
+++ b/lld/test/wasm/lto/stub-library-libcall.s
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t_main.o %t/main.s
 # RUN: llvm-as %S/Inputs/foo.ll -o %t_foo.o
 # RUN: llvm-as %S/Inputs/libcall.ll -o %t_libcall.o
-# RUN: wasm-ld %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm
+# RUN: wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %t_foo.o %p/Inputs/stub.so -o %t.wasm
 # RUN: obj2yaml %t.wasm | FileCheck %s
 
 # The function `func_with_libcall` will generate an undefined reference to
@@ -12,7 +12,7 @@
 # If %t_foo.o is not included in the link we get an undefined symbol reported
 # to the dependency of memcpy on the foo export:
 
-# RUN: not wasm-ld %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s
+# RUN: not wasm-ld -mllvm -mattr=-bulk-memory %t_main.o %t_libcall.o %p/Inputs/stub.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING %s
 # MISSING: stub.so: undefined symbol: foo. Required by memcpy
 
 #--- main.s
diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst
index 715d3772fe2787c..5f1bd0d5738305e 100644
--- a/lldb/docs/resources/test.rst
+++ b/lldb/docs/resources/test.rst
@@ -418,8 +418,8 @@ An overview of all LLDB builders can be found here:
 `https://lab.llvm.org/buildbot/#/builders?tags=lldb <https://lab.llvm.org/buildbot/#/builders?tags=lldb>`_
 
 Building and testing for macOS uses a different platform called GreenDragon. It
-has a dedicated tab for LLDB: `https://green.lab.llvm.org/green/view/LLDB/
-<https://green.lab.llvm.org/green/view/LLDB/>`_
+has a dedicated tab for LLDB: `https://green.lab.llvm.org/job/llvm.org/view/LLDB/
+<https://green.lab.llvm.org/job/llvm.org/view/LLDB/>`_
 
 
 Running The Tests
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
index 63748a71f1122d8..c29992ce9c7848e 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py
@@ -793,8 +793,6 @@ def request_launch(
             args_dict["env"] = env
         if stopOnEntry:
             args_dict["stopOnEntry"] = stopOnEntry
-        if disableASLR:
-            args_dict["disableASLR"] = disableASLR
         if disableSTDIO:
             args_dict["disableSTDIO"] = disableSTDIO
         if shellExpandArguments:
@@ -829,6 +827,7 @@ def request_launch(
         if customThreadFormat:
             args_dict["customThreadFormat"] = customThreadFormat
 
+        args_dict["disableASLR"] = disableASLR
         args_dict["enableAutoVariableSummaries"] = enableAutoVariableSummaries
         args_dict["enableSyntheticChildDebugging"] = enableSyntheticChildDebugging
         args_dict["displayExtendedBacktrace"] = displayExtendedBacktrace
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
index 7e80912be446423..a25466f07fa557f 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py
@@ -367,7 +367,7 @@ def launch(
         cwd=None,
         env=None,
         stopOnEntry=False,
-        disableASLR=True,
+        disableASLR=False,
         disableSTDIO=False,
         shellExpandArguments=False,
         trace=False,
@@ -451,7 +451,7 @@ def build_and_launch(
         cwd=None,
         env=None,
         stopOnEntry=False,
-        disableASLR=True,
+        disableASLR=False,
         disableSTDIO=False,
         shellExpandArguments=False,
         trace=False,
diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp
index fd5b49946c6a927..27a9edeef4249e0 100644
--- a/lldb/source/Core/SourceManager.cpp
+++ b/lldb/source/Core/SourceManager.cpp
@@ -430,7 +430,7 @@ SourceManager::GetDefaultFileAndLine() {
             false; // Force it to be a debug symbol.
         function_options.include_inlines = true;
         executable_ptr->FindFunctions(main_name, CompilerDeclContext(),
-                                      lldb::eFunctionNameTypeBase,
+                                      lldb::eFunctionNameTypeFull,
                                       function_options, sc_list);
         for (const SymbolContext &sc : sc_list) {
           if (sc.function) {
diff --git a/lldb/source/Host/common/FileAction.cpp b/lldb/source/Host/common/FileAction.cpp
index f980d3224640e07..e1c3e14a165ea97 100644
--- a/lldb/source/Host/common/FileAction.cpp
+++ b/lldb/source/Host/common/FileAction.cpp
@@ -41,7 +41,7 @@ bool FileAction::Open(int fd, const FileSpec &file_spec, bool read,
     else if (read)
       m_arg = O_NOCTTY | O_RDONLY;
     else
-      m_arg = O_NOCTTY | O_CREAT | O_WRONLY;
+      m_arg = O_NOCTTY | O_CREAT | O_WRONLY | O_TRUNC;
     m_file_spec = file_spec;
     return true;
   } else {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp
index dee90804c525840..c18edd10b968199 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp
@@ -151,3 +151,19 @@ bool DWARFIndex::ProcessTypeDIEMatchQuery(
     return true;
   return callback(die);
 }
+
+void DWARFIndex::GetNamespacesWithParents(
+    ConstString name, const CompilerDeclContext &parent_decl_ctx,
+    llvm::function_ref<bool(DWARFDIE die)> callback) {
+  GetNamespaces(name, [&](DWARFDIE die) {
+    return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, callback);
+  });
+}
+
+bool DWARFIndex::ProcessNamespaceDieMatchParents(
+    const CompilerDeclContext &parent_decl_ctx, DWARFDIE die,
+    llvm::function_ref<bool(DWARFDIE die)> callback) {
+  if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx, die))
+    return true;
+  return callback(die);
+}
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h
index fea3a4fd697389f..ac1f75e91c21954 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h
@@ -71,6 +71,14 @@ class DWARFIndex {
   virtual void
   GetTypesWithQuery(TypeQuery &query,
                     llvm::function_ref<bool(DWARFDIE die)> callback);
+  /// Get namespace DIEs whose base name match \param name with \param
+  /// parent_decl_ctx in its decl parent chain.  A base implementation
+  /// is provided. Specializations should override this if they are able to
+  /// provide a faster implementation.
+  virtual void
+  GetNamespacesWithParents(ConstString name,
+                           const CompilerDeclContext &parent_decl_ctx,
+                           llvm::function_ref<bool(DWARFDIE die)> callback);
   virtual void
   GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf,
                const CompilerDeclContext &parent_decl_ctx,
@@ -127,6 +135,9 @@ class DWARFIndex {
   bool
   ProcessTypeDIEMatchQuery(TypeQuery &query, DWARFDIE die,
                            llvm::function_ref<bool(DWARFDIE die)> callback);
+  bool ProcessNamespaceDieMatchParents(
+      const CompilerDeclContext &parent_decl_ctx, DWARFDIE die,
+      llvm::function_ref<bool(DWARFDIE die)> callback);
 };
 } // namespace dwarf
 } // namespace lldb_private::plugin
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
index c809e5ff7f85353..6f2cb455ec00e15 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp
@@ -368,9 +368,10 @@ void DebugNamesDWARFIndex::GetFullyQualifiedType(
       continue;
     }
 
-    if (SameParentChain(parent_names, *parent_chain) &&
-        !ProcessEntry(entry, callback))
-      return;
+    if (SameParentChain(parent_names, *parent_chain)) {
+      if (!ProcessEntry(entry, callback))
+        return;
+    }
   }
   m_fallback.GetFullyQualifiedType(context, callback);
 }
@@ -554,17 +555,60 @@ void DebugNamesDWARFIndex::GetTypesWithQuery(
       continue;
     }
 
-    if (WithinParentChain(parent_contexts, *parent_chain) &&
-        !ProcessEntry(entry, [&](DWARFDIE die) {
-          // After .debug_names filtering still sending to base class for
-          // further filtering before calling the callback.
-          return ProcessTypeDIEMatchQuery(query, die, callback);
-        }))
-      return;
+    if (WithinParentChain(parent_contexts, *parent_chain)) {
+      if (!ProcessEntry(entry, [&](DWARFDIE die) {
+            // After .debug_names filtering still sending to base class for
+            // further filtering before calling the callback.
+            return ProcessTypeDIEMatchQuery(query, die, callback);
+          }))
+        // If the callback returns false, we're done.
+        return;
+    }
   }
   m_fallback.GetTypesWithQuery(query, callback);
 }
 
+void DebugNamesDWARFIndex::GetNamespacesWithParents(
+    ConstString name, const CompilerDeclContext &parent_decl_ctx,
+    llvm::function_ref<bool(DWARFDIE die)> callback) {
+  std::vector<lldb_private::CompilerContext> parent_contexts =
+      parent_decl_ctx.GetCompilerContext();
+  llvm::SmallVector<CompilerContext> parent_named_contexts;
+  std::copy_if(parent_contexts.rbegin(), parent_contexts.rend(),
+               std::back_inserter(parent_named_contexts),
+               [](const CompilerContext &ctx) { return !ctx.name.IsEmpty(); });
+  for (const DebugNames::Entry &entry :
+       m_debug_names_up->equal_range(name.GetStringRef())) {
+    lldb_private::dwarf::Tag entry_tag = entry.tag();
+    if (entry_tag == DW_TAG_namespace ||
+        entry_tag == DW_TAG_imported_declaration) {
+      std::optional<llvm::SmallVector<Entry, 4>> parent_chain =
+          getParentChain(entry);
+      if (!parent_chain) {
+        // Fallback: use the base class implementation.
+        if (!ProcessEntry(entry, [&](DWARFDIE die) {
+              return ProcessNamespaceDieMatchParents(parent_decl_ctx, die,
+                                                     callback);
+            }))
+          return;
+        continue;
+      }
+
+      if (WithinParentChain(parent_named_contexts, *parent_chain)) {
+        if (!ProcessEntry(entry, [&](DWARFDIE die) {
+              // After .debug_names filtering still sending to base class for
+              // further filtering before calling the callback.
+              return ProcessNamespaceDieMatchParents(parent_decl_ctx, die,
+                                                     callback);
+            }))
+          // If the callback returns false, we're done.
+          return;
+      }
+    }
+  }
+  m_fallback.GetNamespacesWithParents(name, parent_decl_ctx, callback);
+}
+
 void DebugNamesDWARFIndex::GetFunctions(
     const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf,
     const CompilerDeclContext &parent_decl_ctx,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h
index 074f68a8c55963d..ab6cde12623f6ab 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h
@@ -55,7 +55,9 @@ class DebugNamesDWARFIndex : public DWARFIndex {
   void
   GetTypesWithQuery(TypeQuery &query,
                     llvm::function_ref<bool(DWARFDIE die)> callback) override;
-
+  void GetNamespacesWithParents(
+      ConstString name, const CompilerDeclContext &parent_decl_ctx,
+      llvm::function_ref<bool(DWARFDIE die)> callback) override;
   void GetFunctions(const Module::LookupInfo &lookup_info,
                     SymbolFileDWARF &dwarf,
                     const CompilerDeclContext &parent_decl_ctx,
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index e5b8eee8d08c246..f23f8cc3d781d03 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -2900,7 +2900,7 @@ SymbolFileDWARF::FindNamespace(ConstString name,
   if (!DeclContextMatchesThisSymbolFile(parent_decl_ctx))
     return namespace_decl_ctx;
 
-  m_index->GetNamespaces(name, [&](DWARFDIE die) {
+  m_index->GetNamespacesWithParents(name, parent_decl_ctx, [&](DWARFDIE die) {
     if (!DIEInDeclContext(parent_decl_ctx, die, only_root_namespaces))
       return true; // The containing decl contexts don't match
 
diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
index 49edd40544e32ab..1a680d80a9d3d71 100644
--- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
+++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp
@@ -461,8 +461,7 @@ size_t UnwindAssemblyInstEmulation::WriteMemory(
 
     if (reg_num != LLDB_INVALID_REGNUM &&
         generic_regnum != LLDB_REGNUM_GENERIC_SP) {
-      if (m_pushed_regs.find(reg_num) == m_pushed_regs.end()) {
-        m_pushed_regs[reg_num] = addr;
+      if (m_pushed_regs.try_emplace(reg_num, addr).second) {
         const int32_t offset = addr - m_initial_sp;
         m_curr_row->SetRegisterLocationToAtCFAPlusOffset(reg_num, offset,
                                                          /*can_replace=*/true);
@@ -608,8 +607,8 @@ bool UnwindAssemblyInstEmulation::WriteRegister(
         generic_regnum != LLDB_REGNUM_GENERIC_SP) {
       switch (context.GetInfoType()) {
       case EmulateInstruction::eInfoTypeAddress:
-        if (m_pushed_regs.find(reg_num) != m_pushed_regs.end() &&
-            context.info.address == m_pushed_regs[reg_num]) {
+        if (auto it = m_pushed_regs.find(reg_num);
+            it != m_pushed_regs.end() && context.info.address == it->second) {
           m_curr_row->SetRegisterLocationToSame(reg_num,
                                                 false /*must_replace*/);
           m_curr_row_modified = true;
diff --git a/lldb/test/API/commands/settings/TestSettings.py b/lldb/test/API/commands/settings/TestSettings.py
index 385acceb7a8b5c1..2dd813f6b155b36 100644
--- a/lldb/test/API/commands/settings/TestSettings.py
+++ b/lldb/test/API/commands/settings/TestSettings.py
@@ -528,6 +528,59 @@ def test_set_error_output_path(self):
             output, exe=False, startstr="This message should go to standard out."
         )
 
+    @skipIfDarwinEmbedded  # <rdar://problem/34446098> debugserver on ios etc can't write files
+    def test_same_error_output_path(self):
+        """Test that setting target.error and output-path to the same file path for the launched process works."""
+        self.build()
+
+        exe = self.getBuildArtifact("a.out")
+        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
+
+        # Set the error-path and output-path and verify both are set.
+        self.runCmd(
+            "settings set target.error-path '{0}'".format(
+                lldbutil.append_to_process_working_directory(self, "output.txt")
+            )
+        )
+        self.runCmd(
+            "settings set target.output-path '{0}".format(
+                lldbutil.append_to_process_working_directory(self, "output.txt")
+            )
+        )
+        # And add hooks to restore the original settings during tearDown().
+        self.addTearDownHook(lambda: self.runCmd("settings clear target.output-path"))
+        self.addTearDownHook(lambda: self.runCmd("settings clear target.error-path"))
+
+        self.expect(
+            "settings show target.error-path",
+            SETTING_MSG("target.error-path"),
+            substrs=["target.error-path (file)", 'output.txt"'],
+        )
+
+        self.expect(
+            "settings show target.output-path",
+            SETTING_MSG("target.output-path"),
+            substrs=["target.output-path (file)", 'output.txt"'],
+        )
+
+        self.runCmd(
+            "process launch --working-dir '{0}'".format(
+                self.get_process_working_directory()
+            ),
+            RUN_SUCCEEDED,
+        )
+
+        output = lldbutil.read_file_from_process_wd(self, "output.txt")
+        err_message = "This message should go to standard error."
+        out_message = "This message should go to standard out."
+        # Error msg should get flushed by the output msg
+        self.expect(output, exe=False, substrs=[out_message])
+        self.assertNotIn(
+            err_message,
+            output,
+            "Race condition when both stderr/stdout redirects to the same file",
+        )
+
     def test_print_dictionary_setting(self):
         self.runCmd("settings clear target.env-vars")
         self.runCmd('settings set target.env-vars ["MY_VAR"]=some-value')
diff --git a/lldb/test/API/python_api/process/io/TestProcessIO.py b/lldb/test/API/python_api/process/io/TestProcessIO.py
index 5bb91d2758312df..3b5c7c48c51f4df 100644
--- a/lldb/test/API/python_api/process/io/TestProcessIO.py
+++ b/lldb/test/API/python_api/process/io/TestProcessIO.py
@@ -95,6 +95,36 @@ def test_stdout_stderr_redirection(self):
         error = self.read_error_file_and_delete()
         self.check_process_output(output, error)
 
+    @skipIfWindows  # stdio manipulation unsupported on Windows
+    @expectedFlakeyLinux(bugnumber="llvm.org/pr26437")
+    @skipIfDarwinEmbedded  # debugserver can't create/write files on the device
+    def test_stdout_stderr_redirection_to_existing_files(self):
+        """Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR without redirecting STDIN to output files already exist."""
+        self.setup_test()
+        self.build()
+        self.create_target()
+        self.write_file_with_placeholder(self.output_file)
+        self.write_file_with_placeholder(self.error_file)
+        self.redirect_stdout()
+        self.redirect_stderr()
+        self.run_process(True)
+        output = self.read_output_file_and_delete()
+        error = self.read_error_file_and_delete()
+        self.check_process_output(output, error)
+
+    def write_file_with_placeholder(self, target_file):
+        placeholder = "This content should be overwritten."
+        if lldb.remote_platform:
+            self.runCmd(
+                'platform file write "{target}" -d "{data}"'.format(
+                    target=target_file, data=placeholder
+                )
+            )
+        else:
+            f = open(target_file, "w")
+            f.write(placeholder)
+            f.close()
+
     # target_file - path on local file system or remote file system if running remote
     # local_file - path on local system
     def read_file_and_delete(self, target_file, local_file):
diff --git a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp
index 60d4c3bc293a3c6..97908b4acaf284f 100644
--- a/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp
+++ b/lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp
@@ -208,7 +208,8 @@ std::vector<std::string> MachVMRegion::GetMemoryTypes() const {
       m_data.user_tag == VM_MEMORY_MALLOC_LARGE_REUSABLE ||
       m_data.user_tag == VM_MEMORY_MALLOC_HUGE ||
       m_data.user_tag == VM_MEMORY_REALLOC ||
-      m_data.user_tag == VM_MEMORY_SBRK) {
+      m_data.user_tag == VM_MEMORY_SBRK ||
+      m_data.user_tag == VM_MEMORY_SANITIZER) {
     types.push_back("heap");
     if (m_data.user_tag == VM_MEMORY_MALLOC_TINY) {
       types.push_back("malloc-tiny");
diff --git a/lldb/tools/lldb-dap/Breakpoint.cpp b/lldb/tools/lldb-dap/Breakpoint.cpp
index 0c33d4b114d7602..9ea7a42ca85a1ef 100644
--- a/lldb/tools/lldb-dap/Breakpoint.cpp
+++ b/lldb/tools/lldb-dap/Breakpoint.cpp
@@ -9,6 +9,7 @@
 #include "Breakpoint.h"
 #include "DAP.h"
 #include "JSONUtils.h"
+#include "lldb/API/SBBreakpointLocation.h"
 #include "llvm/ADT/StringExtras.h"
 
 using namespace lldb_dap;
diff --git a/lldb/tools/lldb-dap/Breakpoint.h b/lldb/tools/lldb-dap/Breakpoint.h
index 47a9d9c59ae2b76..ee9d3736d6190fb 100644
--- a/lldb/tools/lldb-dap/Breakpoint.h
+++ b/lldb/tools/lldb-dap/Breakpoint.h
@@ -10,6 +10,7 @@
 #define LLDB_TOOLS_LLDB_DAP_BREAKPOINT_H
 
 #include "BreakpointBase.h"
+#include "lldb/API/SBBreakpoint.h"
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/BreakpointBase.cpp b/lldb/tools/lldb-dap/BreakpointBase.cpp
index 519729f5519ffcd..f3cb06a3562d485 100644
--- a/lldb/tools/lldb-dap/BreakpointBase.cpp
+++ b/lldb/tools/lldb-dap/BreakpointBase.cpp
@@ -7,8 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "BreakpointBase.h"
-#include "DAP.h"
-#include "llvm/ADT/StringExtras.h"
+#include "JSONUtils.h"
 
 using namespace lldb_dap;
 
diff --git a/lldb/tools/lldb-dap/BreakpointBase.h b/lldb/tools/lldb-dap/BreakpointBase.h
index 5a04bb201615fc8..79301480e0e5888 100644
--- a/lldb/tools/lldb-dap/BreakpointBase.h
+++ b/lldb/tools/lldb-dap/BreakpointBase.h
@@ -9,10 +9,8 @@
 #ifndef LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H
 #define LLDB_TOOLS_LLDB_DAP_BREAKPOINTBASE_H
 
-#include "lldb/API/SBBreakpoint.h"
 #include "llvm/Support/JSON.h"
 #include <string>
-#include <vector>
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp
index 68559e382006db8..283392270ba26c9 100644
--- a/lldb/tools/lldb-dap/DAP.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -10,11 +10,14 @@
 #include <cstdarg>
 #include <fstream>
 #include <mutex>
-#include <sstream>
 
 #include "DAP.h"
+#include "JSONUtils.h"
 #include "LLDBUtils.h"
 #include "lldb/API/SBCommandInterpreter.h"
+#include "lldb/API/SBLanguageRuntime.h"
+#include "lldb/API/SBListener.h"
+#include "lldb/API/SBStream.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormatVariadic.h"
 
diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h
index acc10ade75fd147..dab4ce44ab202cb 100644
--- a/lldb/tools/lldb-dap/DAP.h
+++ b/lldb/tools/lldb-dap/DAP.h
@@ -9,16 +9,10 @@
 #ifndef LLDB_TOOLS_LLDB_DAP_DAP_H
 #define LLDB_TOOLS_LLDB_DAP_DAP_H
 
-#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
-
-#include <atomic>
-#include <condition_variable>
 #include <cstdio>
-#include <future>
 #include <iosfwd>
 #include <map>
 #include <optional>
-#include <set>
 #include <thread>
 
 #include "llvm/ADT/DenseMap.h"
@@ -30,24 +24,12 @@
 #include "llvm/Support/raw_ostream.h"
 
 #include "lldb/API/SBAttachInfo.h"
-#include "lldb/API/SBBreakpoint.h"
-#include "lldb/API/SBBreakpointLocation.h"
 #include "lldb/API/SBCommandInterpreter.h"
 #include "lldb/API/SBCommandReturnObject.h"
-#include "lldb/API/SBCommunication.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBEvent.h"
 #include "lldb/API/SBFormat.h"
-#include "lldb/API/SBHostOS.h"
-#include "lldb/API/SBInstruction.h"
-#include "lldb/API/SBInstructionList.h"
-#include "lldb/API/SBLanguageRuntime.h"
 #include "lldb/API/SBLaunchInfo.h"
-#include "lldb/API/SBLineEntry.h"
-#include "lldb/API/SBListener.h"
-#include "lldb/API/SBProcess.h"
-#include "lldb/API/SBStream.h"
-#include "lldb/API/SBStringList.h"
 #include "lldb/API/SBTarget.h"
 #include "lldb/API/SBThread.h"
 
@@ -56,7 +38,6 @@
 #include "IOStream.h"
 #include "InstructionBreakpoint.h"
 #include "ProgressEvent.h"
-#include "RunInTerminal.h"
 #include "SourceBreakpoint.h"
 
 #define VARREF_LOCALS (int64_t)1
diff --git a/lldb/tools/lldb-dap/FifoFiles.cpp b/lldb/tools/lldb-dap/FifoFiles.cpp
index 9a6423f79471a40..1f1bba80bd3b113 100644
--- a/lldb/tools/lldb-dap/FifoFiles.cpp
+++ b/lldb/tools/lldb-dap/FifoFiles.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "FifoFiles.h"
+#include "JSONUtils.h"
 
 #if !defined(_WIN32)
 #include <sys/stat.h>
@@ -18,11 +19,6 @@
 #include <fstream>
 #include <future>
 #include <optional>
-#include <thread>
-
-#include "llvm/Support/FileSystem.h"
-
-#include "lldb/lldb-defines.h"
 
 using namespace llvm;
 
diff --git a/lldb/tools/lldb-dap/FifoFiles.h b/lldb/tools/lldb-dap/FifoFiles.h
index 02a97cd5cbbd23c..633ebeb2aedd45b 100644
--- a/lldb/tools/lldb-dap/FifoFiles.h
+++ b/lldb/tools/lldb-dap/FifoFiles.h
@@ -9,10 +9,8 @@
 #ifndef LLDB_TOOLS_LLDB_DAP_FIFOFILES_H
 #define LLDB_TOOLS_LLDB_DAP_FIFOFILES_H
 
-#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
 #include "llvm/Support/Error.h"
-
-#include "JSONUtils.h"
+#include "llvm/Support/JSON.h"
 
 #include <chrono>
 
diff --git a/lldb/tools/lldb-dap/FunctionBreakpoint.cpp b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp
index 21743bf908706d0..216c685f633da80 100644
--- a/lldb/tools/lldb-dap/FunctionBreakpoint.cpp
+++ b/lldb/tools/lldb-dap/FunctionBreakpoint.cpp
@@ -8,6 +8,7 @@
 
 #include "FunctionBreakpoint.h"
 #include "DAP.h"
+#include "JSONUtils.h"
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/IOStream.cpp b/lldb/tools/lldb-dap/IOStream.cpp
index 96e9a1ed49532f8..d2e8ec40b0a7b85 100644
--- a/lldb/tools/lldb-dap/IOStream.cpp
+++ b/lldb/tools/lldb-dap/IOStream.cpp
@@ -18,7 +18,6 @@
 
 #include <fstream>
 #include <string>
-#include <vector>
 
 using namespace lldb_dap;
 
diff --git a/lldb/tools/lldb-dap/IOStream.h b/lldb/tools/lldb-dap/IOStream.h
index b62502419182cd3..57d5fd458b7165d 100644
--- a/lldb/tools/lldb-dap/IOStream.h
+++ b/lldb/tools/lldb-dap/IOStream.h
@@ -9,8 +9,6 @@
 #ifndef LLDB_TOOLS_LLDB_DAP_IOSTREAM_H
 #define LLDB_TOOLS_LLDB_DAP_IOSTREAM_H
 
-#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX
-
 #if defined(_WIN32)
 // We need to #define NOMINMAX in order to skip `min()` and `max()` macro
 // definitions that conflict with other system headers.
diff --git a/lldb/tools/lldb-dap/InstructionBreakpoint.cpp b/lldb/tools/lldb-dap/InstructionBreakpoint.cpp
index de4f6f5d86717f6..e3a8460bb7b3014 100644
--- a/lldb/tools/lldb-dap/InstructionBreakpoint.cpp
+++ b/lldb/tools/lldb-dap/InstructionBreakpoint.cpp
@@ -9,6 +9,7 @@
 
 #include "InstructionBreakpoint.h"
 #include "DAP.h"
+#include "JSONUtils.h"
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/InstructionBreakpoint.h b/lldb/tools/lldb-dap/InstructionBreakpoint.h
index cf1516f46e9551f..53912af46ca1480 100644
--- a/lldb/tools/lldb-dap/InstructionBreakpoint.h
+++ b/lldb/tools/lldb-dap/InstructionBreakpoint.h
@@ -11,7 +11,6 @@
 #define LLDB_TOOLS_LLDB_DAP_INSTRUCTIONBREAKPOINT_H
 
 #include "Breakpoint.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp
index e42a6d9d6998045..97fe6b4f9f05db7 100644
--- a/lldb/tools/lldb-dap/JSONUtils.cpp
+++ b/lldb/tools/lldb-dap/JSONUtils.cpp
@@ -6,21 +6,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <algorithm>
 #include <iomanip>
 #include <optional>
 #include <sstream>
 #include <string.h>
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
 
-#include "lldb/API/SBBreakpoint.h"
-#include "lldb/API/SBBreakpointLocation.h"
 #include "lldb/API/SBDeclaration.h"
+#include "lldb/API/SBStream.h"
 #include "lldb/API/SBStringList.h"
 #include "lldb/API/SBStructuredData.h"
 #include "lldb/API/SBValue.h"
diff --git a/lldb/tools/lldb-dap/LLDBUtils.cpp b/lldb/tools/lldb-dap/LLDBUtils.cpp
index b38833c0fdb6b6f..2ffcba7dff4f248 100644
--- a/lldb/tools/lldb-dap/LLDBUtils.cpp
+++ b/lldb/tools/lldb-dap/LLDBUtils.cpp
@@ -8,6 +8,8 @@
 
 #include "LLDBUtils.h"
 #include "DAP.h"
+#include "JSONUtils.h"
+#include "lldb/API/SBStringList.h"
 
 #include <mutex>
 
diff --git a/lldb/tools/lldb-dap/OutputRedirector.h b/lldb/tools/lldb-dap/OutputRedirector.h
index dba51016775bf45..e26d1648b104f9d 100644
--- a/lldb/tools/lldb-dap/OutputRedirector.h
+++ b/lldb/tools/lldb-dap/OutputRedirector.h
@@ -9,8 +9,6 @@
 #ifndef LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H
 #define LLDB_TOOLS_LLDB_DAP_OUTPUT_REDIRECTOR_H
 
-#include <thread>
-
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
diff --git a/lldb/tools/lldb-dap/ProgressEvent.cpp b/lldb/tools/lldb-dap/ProgressEvent.cpp
index 8a660b50af1205b..0dcc2ee81001d50 100644
--- a/lldb/tools/lldb-dap/ProgressEvent.cpp
+++ b/lldb/tools/lldb-dap/ProgressEvent.cpp
@@ -110,7 +110,6 @@ json::Value ProgressEvent::ToJSON() const {
   std::string progress_id_str;
   llvm::raw_string_ostream progress_id_strm(progress_id_str);
   progress_id_strm << m_progress_id;
-  progress_id_strm.flush();
   body.try_emplace("progressId", progress_id_str);
 
   if (m_event_type == progressStart) {
diff --git a/lldb/tools/lldb-dap/RunInTerminal.cpp b/lldb/tools/lldb-dap/RunInTerminal.cpp
index ad019b8a56a4fa6..4fe09e2885a8e5e 100644
--- a/lldb/tools/lldb-dap/RunInTerminal.cpp
+++ b/lldb/tools/lldb-dap/RunInTerminal.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RunInTerminal.h"
+#include "JSONUtils.h"
 
 #if !defined(_WIN32)
 #include <sys/stat.h>
@@ -15,14 +16,10 @@
 #endif
 
 #include <chrono>
-#include <fstream>
 #include <future>
-#include <thread>
 
 #include "llvm/Support/FileSystem.h"
 
-#include "lldb/lldb-defines.h"
-
 using namespace llvm;
 
 namespace lldb_dap {
diff --git a/lldb/tools/lldb-dap/RunInTerminal.h b/lldb/tools/lldb-dap/RunInTerminal.h
index 2fbe3acbb408427..b20f8beb6071dd9 100644
--- a/lldb/tools/lldb-dap/RunInTerminal.h
+++ b/lldb/tools/lldb-dap/RunInTerminal.h
@@ -10,9 +10,11 @@
 #define LLDB_TOOLS_LLDB_DAP_RUNINTERMINAL_H
 
 #include "FifoFiles.h"
+#include "lldb/API/SBError.h"
 
 #include <future>
-#include <thread>
+#include <memory>
+#include <string>
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/SourceBreakpoint.cpp b/lldb/tools/lldb-dap/SourceBreakpoint.cpp
index f5dd1346cb9e543..d1a3a5bedb0ae29 100644
--- a/lldb/tools/lldb-dap/SourceBreakpoint.cpp
+++ b/lldb/tools/lldb-dap/SourceBreakpoint.cpp
@@ -8,6 +8,7 @@
 
 #include "SourceBreakpoint.h"
 #include "DAP.h"
+#include "JSONUtils.h"
 
 namespace lldb_dap {
 
diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp
index f70b0d3d4cbee07..a2f7be2b214e4ae 100644
--- a/lldb/tools/lldb-dap/lldb-dap.cpp
+++ b/lldb/tools/lldb-dap/lldb-dap.cpp
@@ -7,9 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "DAP.h"
+#include "FifoFiles.h"
+#include "RunInTerminal.h"
 #include "Watchpoint.h"
 #include "lldb/API/SBDeclaration.h"
+#include "lldb/API/SBInstruction.h"
+#include "lldb/API/SBListener.h"
 #include "lldb/API/SBMemoryRegionInfo.h"
+#include "lldb/API/SBStringList.h"
 #include "llvm/Support/Base64.h"
 
 #include <cassert>
@@ -43,17 +48,12 @@
 
 #include <algorithm>
 #include <array>
-#include <chrono>
-#include <fstream>
 #include <map>
 #include <memory>
-#include <mutex>
 #include <set>
-#include <sstream>
 #include <thread>
 #include <vector>
 
-#include "lldb/API/SBEnvironment.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Host/Config.h"
 #include "llvm/ADT/ArrayRef.h"
diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp
index f9e0605fce29d61..fdc9bfae1876c51 100644
--- a/lldb/unittests/Expression/DWARFExpressionTest.cpp
+++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp
@@ -181,6 +181,9 @@ TEST(DWARFExpression, DW_OP_bra) {
       }),
       // clang-format on
       llvm::HasValue(0x42));
+
+  EXPECT_THAT_ERROR(Evaluate({DW_OP_bra, 0x01, 0x00}).takeError(),
+                    llvm::Failed());
 }
 
 TEST(DWARFExpression, DW_OP_convert) {
diff --git a/lldb/unittests/Host/FileActionTest.cpp b/lldb/unittests/Host/FileActionTest.cpp
index b208169aac20e60..56227cd587e5bbe 100644
--- a/lldb/unittests/Host/FileActionTest.cpp
+++ b/lldb/unittests/Host/FileActionTest.cpp
@@ -6,8 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <fcntl.h>
+
 #include "lldb/Host/FileAction.h"
 #include "gtest/gtest.h"
+#if defined(_WIN32)
+#include "lldb/Host/windows/PosixApi.h"
+#endif
 
 using namespace lldb_private;
 
@@ -17,3 +22,26 @@ TEST(FileActionTest, Open) {
   EXPECT_EQ(Action.GetAction(), FileAction::eFileActionOpen);
   EXPECT_EQ(Action.GetFileSpec(), FileSpec("/tmp"));
 }
+
+TEST(FileActionTest, OpenReadWrite) {
+  FileAction Action;
+  Action.Open(48, FileSpec("/tmp_0"), /*read*/ true, /*write*/ true);
+  EXPECT_TRUE(Action.GetActionArgument() & (O_NOCTTY | O_CREAT | O_RDWR));
+  EXPECT_FALSE(Action.GetActionArgument() & O_RDONLY);
+  EXPECT_FALSE(Action.GetActionArgument() & O_WRONLY);
+}
+
+TEST(FileActionTest, OpenReadOnly) {
+  FileAction Action;
+  Action.Open(49, FileSpec("/tmp_1"), /*read*/ true, /*write*/ false);
+  EXPECT_TRUE(Action.GetActionArgument() & (O_NOCTTY | O_RDONLY));
+  EXPECT_FALSE(Action.GetActionArgument() & O_WRONLY);
+}
+
+TEST(FileActionTest, OpenWriteOnly) {
+  FileAction Action;
+  Action.Open(50, FileSpec("/tmp_2"), /*read*/ false, /*write*/ true);
+  EXPECT_TRUE(Action.GetActionArgument() &
+              (O_NOCTTY | O_CREAT | O_WRONLY | O_TRUNC));
+  EXPECT_FALSE(Action.GetActionArgument() & O_RDONLY);
+}
diff --git a/llvm/docs/CommandGuide/lit.rst b/llvm/docs/CommandGuide/lit.rst
index bd1cfbbade511f0..af8a1a08be535dd 100644
--- a/llvm/docs/CommandGuide/lit.rst
+++ b/llvm/docs/CommandGuide/lit.rst
@@ -57,7 +57,11 @@ GENERAL OPTIONS
 
 .. option:: -h, --help
 
- Show the :program:`lit` help message.
+ Show the :program:`lit` help message and exit.
+
+.. option:: --version
+
+ Show :program:`lit`'s version number and exit.
 
 .. option:: -j N, --workers=N
 
@@ -108,23 +112,51 @@ OUTPUT OPTIONS
 
  Enable -v, but for all tests not just failed tests.
 
+.. option:: -o PATH, --output PATH
+
+ Write test results to the provided path.
+
 .. option:: --no-progress-bar
 
  Do not use curses based progress bar.
 
+.. option:: --show-excluded
+
+ Show excluded tests.
+
+.. option:: --show-skipped
+
+ Show skipped tests.
+
 .. option:: --show-unsupported
 
- Show the names of unsupported tests.
+ Show unsupported tests.
+
+.. option:: --show-pass
+
+ Show passed tests.
+
+.. option:: --show-flakypass
+
+ Show passed with retry tests.
 
 .. option:: --show-xfail
 
- Show the names of tests that were expected to fail.
+ Show expectedly failed tests.
 
 .. _execution-options:
 
 EXECUTION OPTIONS
 -----------------
 
+.. option:: --gtest-sharding
+
+ Enable sharding for GoogleTest format.
+
+.. option:: --no-gtest-sharding
+
+ Disable sharding for GoogleTest format.
+
 .. option:: --path=PATH
 
  Specify an additional ``PATH`` to use when searching for executables in tests.
@@ -139,11 +171,6 @@ EXECUTION OPTIONS
  "``valgrind``" feature that can be used to conditionally disable (or expect
  failure in) certain tests.
 
-.. option:: --vg-arg=ARG
-
- When :option:`--vg` is used, specify an additional argument to pass to
- :program:`valgrind` itself.
-
 .. option:: --vg-leak
 
  When :option:`--vg` is used, enable memory leak checks.  When this option is
@@ -151,9 +178,59 @@ EXECUTION OPTIONS
  feature that can be used to conditionally disable (or expect failure in)
  certain tests.
 
+.. option:: --vg-arg=ARG
+
+ When :option:`--vg` is used, specify an additional argument to pass to
+ :program:`valgrind` itself.
+
+.. option:: --no-execute
+
+ Don't execute any tests (assume that they pass).
+
+.. option:: --xunit-xml-output XUNIT_XML_OUTPUT
+
+ Write XUnit-compatible XML test reports to the specified file.
+
+.. option:: --resultdb-output RESULTDB_OUTPUT
+
+ Write LuCI ResultDB compatible JSON to the specified file.
+
+.. option:: --time-trace-output TIME_TRACE_OUTPUT
+
+ Write Chrome tracing compatible JSON to the specified file
+
+.. option:: --timeout MAXINDIVIDUALTESTTIME
+
+ Maximum time to spend running a single test (in seconds). 0 means no time
+ limit. [Default: 0]
+
+.. option:: --timeout=N
+
+ Spend at most ``N`` seconds (approximately) running each individual test.
+ ``0`` means no time limit, and ``0`` is the default. Note that this is not an
+ alias for :option:`--max-time`; the two are different kinds of maximums.
+
+.. option:: --max-failures MAX_FAILURES
+
+ Stop execution after the given number of failures.
+
+.. option:: --allow-empty-runs
+
+ Do not fail the run if all tests are filtered out.
+
+.. option:: --per-test-coverage
+
+ Emit the necessary test coverage data, divided per test case (involves
+ setting a unique value to LLVM_PROFILE_FILE for each RUN). The coverage
+ data files will be emitted in the directory specified by ``config.test_exec_root``.
+
+.. option:: --ignore-fail
+
+ Exit with status zero even if some tests fail.
+
 .. option:: --skip-test-time-recording
 
- Disable tracking the wall time individual tests take to execute.
+ Do not track elapsed wall time for each test.
 
 .. option:: --time-tests
 
@@ -161,10 +238,6 @@ EXECUTION OPTIONS
  in the summary output.  This is useful for determining which tests in a test
  suite take the most time to execute.
 
-.. option:: --ignore-fail
-
- Exit with status zero even if some tests fail.
-
 .. _selection-options:
 
 SELECTION OPTIONS
@@ -178,23 +251,6 @@ The timing data is stored in the `test_exec_root` in a file named
 `.lit_test_times.txt`. If this file does not exist, then `lit` checks the
 `test_source_root` for the file to optionally accelerate clean builds.
 
-.. option:: --shuffle
-
- Run the tests in a random order, not failing/slowest first. Deprecated,
- use :option:`--order` instead.
-
-.. option:: --per-test-coverage
-
- Emit the necessary test coverage data, divided per test case (involves
- setting a unique value to LLVM_PROFILE_FILE for each RUN). The coverage
- data files will be emitted in the directory specified by `config.test_exec_root`.
-
-.. option:: --max-failures N
-
- Stop execution after the given number ``N`` of failures.
- An integer argument should be passed on the command line
- prior to execution.
-
 .. option:: --max-tests=N
 
  Run at most ``N`` tests and then terminate.
@@ -205,16 +261,6 @@ The timing data is stored in the `test_exec_root` in a file named
  Note that this is not an alias for :option:`--timeout`; the two are
  different kinds of maximums.
 
-.. option:: --num-shards=M
-
- Divide the set of selected tests into ``M`` equal-sized subsets or
- "shards", and run only one of them.  Must be used with the
- ``--run-shard=N`` option, which selects the shard to run. The environment
- variable ``LIT_NUM_SHARDS`` can also be used in place of this
- option. These two options provide a coarse mechanism for partitioning large
- testsuites, for parallel execution on separate machines (say in a large
- testing farm).
-
 .. option:: --order={lexical,random,smart}
 
  Define the order in which tests are run. The supported values are:
@@ -228,18 +274,14 @@ The timing data is stored in the `test_exec_root` in a file named
    tests, all in descending execution time order. This is the default as it
    optimizes concurrency.
 
-.. option:: --run-shard=N
+.. option:: --shuffle
 
- Select which shard to run, assuming the ``--num-shards=M`` option was
- provided. The two options must be used together, and the value of ``N``
- must be in the range ``1..M``. The environment variable
- ``LIT_RUN_SHARD`` can also be used in place of this option.
+ Run the tests in a random order, not failing/slowest first. Deprecated,
+ use :option:`--order` instead.
 
-.. option:: --timeout=N
+.. option:: -i, --incremental
 
- Spend at most ``N`` seconds (approximately) running each individual test.
- ``0`` means no time limit, and ``0`` is the default. Note that this is not an
- alias for :option:`--max-time`; the two are different kinds of maximums.
+ Run failed tests first (DEPRECATED: use ``--order=smart``).
 
 .. option:: --filter=REGEXP
 
@@ -297,6 +339,23 @@ The timing data is stored in the `test_exec_root` in a file named
   primary purpose is to suppress an ``XPASS`` result without modifying a test
   case that uses the ``XFAIL`` directive.
 
+.. option:: --num-shards=M
+
+ Divide the set of selected tests into ``M`` equal-sized subsets or
+ "shards", and run only one of them.  Must be used with the
+ ``--run-shard=N`` option, which selects the shard to run. The environment
+ variable ``LIT_NUM_SHARDS`` can also be used in place of this
+ option. These two options provide a coarse mechanism for partitioning large
+ testsuites, for parallel execution on separate machines (say in a large
+ testing farm).
+
+.. option:: --run-shard=N
+
+ Select which shard to run, assuming the ``--num-shards=M`` option was
+ provided. The two options must be used together, and the value of ``N``
+ must be in the range ``1..M``. The environment variable
+ ``LIT_RUN_SHARD`` can also be used in place of this option.
+
 ADDITIONAL OPTIONS
 ------------------
 
@@ -313,6 +372,11 @@ ADDITIONAL OPTIONS
 
  List all of the discovered tests and exit.
 
+.. option:: --show-used-features
+
+ Show all features used in the test suite (in ``XFAIL``, ``UNSUPPORTED`` and
+ ``REQUIRES``) and exit.
+
 EXIT STATUS
 -----------
 
diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst
index 1c4e00b956bc4f8..8920530dc3f1a13 100644
--- a/llvm/docs/GlobalISel/GenericOpcode.rst
+++ b/llvm/docs/GlobalISel/GenericOpcode.rst
@@ -633,8 +633,8 @@ G_FCEIL, G_FSQRT, G_FFLOOR, G_FRINT, G_FNEARBYINT
 
 These correspond to the standard C functions of the same name.
 
-G_FCOS, G_FSIN, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+G_FCOS, G_FSIN, G_FSINCOS, G_FTAN, G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH, G_FSINH, G_FTANH
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 These correspond to the standard C trigonometry functions of the same name.
 
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b83675c6ed97aa8..177924dca4d1782 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2334,7 +2334,7 @@ example:
     This attribute indicates that RealtimeSanitizer checks
     (realtime safety analysis - no allocations, syscalls or exceptions) are enabled
     for this function.
-``sanitize_realtime_unsafe``
+``sanitize_realtime_blocking``
     This attribute indicates that RealtimeSanitizer should error immediately
     if the attributed function is called during invocation of a function
     attributed with ``sanitize_realtime``.
@@ -15512,6 +15512,8 @@ Semantics:
 This function returns the first value raised to the second power with an
 unspecified sequence of rounding operations.
 
+.. _t_llvm_sin:
+
 '``llvm.sin.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -15549,6 +15551,8 @@ trapping or setting ``errno``.
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
+.. _t_llvm_cos:
+
 '``llvm.cos.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -15882,6 +15886,50 @@ trapping or setting ``errno``.
 When specified with the fast-math-flag 'afn', the result may be approximated
 using a less accurate calculation.
 
+
+'``llvm.sincos.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sincos`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare { float, float }          @llvm.sincos.f32(float  %Val)
+      declare { double, double }        @llvm.sincos.f64(double %Val)
+      declare { x86_fp80, x86_fp80 }    @llvm.sincos.f80(x86_fp80  %Val)
+      declare { fp128, fp128 }          @llvm.sincos.f128(fp128 %Val)
+      declare { ppc_fp128, ppc_fp128 }  @llvm.sincos.ppcf128(ppc_fp128  %Val)
+      declare { <4 x float>, <4 x float> } @llvm.sincos.v4f32(<4 x float>  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sincos.*``' intrinsics returns the sine and cosine of the operand.
+
+Arguments:
+""""""""""
+
+The argument is a :ref:`floating-point <t_floating>` value or
+:ref:`vector <t_vector>` of floating-point values. Returns two values matching
+the argument type in a struct.
+
+Semantics:
+""""""""""
+
+This intrinsic is equivalent to a calling both :ref:`llvm.sin <t_llvm_sin>`
+and :ref:`llvm.cos <t_llvm_cos>` on the argument.
+
+The first result is the sine of the argument and the second result is the cosine
+of the argument.
+
+When specified with the fast-math-flag 'afn', the result may be approximated
+using a less accurate calculation.
+
 '``llvm.pow.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst
index 2135ed3b1114180..f6f2eb45c49c17f 100644
--- a/llvm/docs/RISCVUsage.rst
+++ b/llvm/docs/RISCVUsage.rst
@@ -82,6 +82,10 @@ ISA naming string. Currently supported profiles:
 * ``rva20s64``
 * ``rva22u64``
 * ``rva22s64``
+* ``rva23u64``
+* ``rva23s64``
+* ``rvb23u64``
+* ``rvb23s64``
 
 Note that you can also append additional extension names to be enabled, e.g.
 ``rva20u64_zicond`` will enable the ``zicond`` extension in addition to those
@@ -91,10 +95,6 @@ Profiles that are not yet ratified cannot be used unless
 ``-menable-experimental-extensions`` (or equivalent for other tools) is
 specified. This applies to the following profiles:
 
-* ``rva23u64``
-* ``rva23s64``
-* ``rvb23u64``
-* ``rvb23s64``
 * ``rvm23u32``
 
 .. _riscv-extensions:
@@ -119,6 +119,7 @@ on support follow.
      ``E``             Supported (`See note <#riscv-rve-note>`__)
      ``H``             Assembly Support
      ``M``             Supported
+     ``Sha``           Supported
      ``Shcounterenw``  Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Shgatpa``       Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Shtvala``       Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
@@ -129,6 +130,8 @@ on support follow.
      ``Smcdeleg``      Supported
      ``Smcsrind``      Supported
      ``Smepmp``        Supported
+     ``Smmpm``         Supported
+     ``Smnpm``         Supported
      ``Smrnmi``        Assembly Support
      ``Smstateen``     Assembly Support
      ``Ssaia``         Supported
@@ -137,6 +140,8 @@ on support follow.
      ``Sscofpmf``      Assembly Support
      ``Sscounterenw``  Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Sscsrind``      Supported
+     ``Ssnpm``         Supported
+     ``Sspm``          Supported
      ``Ssqosid``       Assembly Support
      ``Ssstateen``     Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Ssstrict``      Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
@@ -144,12 +149,14 @@ on support follow.
      ``Sstvala``       Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Sstvecd``       Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Ssu64xl``       Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
+     ``Supm``          Supported
      ``Svade``         Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Svadu``         Assembly Support
      ``Svbare``        Assembly Support (`See note <#riscv-profiles-extensions-note>`__)
      ``Svinval``       Assembly Support
      ``Svnapot``       Assembly Support
      ``Svpbmt``        Supported
+     ``Svvptc``        Supported
      ``V``             Supported
      ``Za128rs``       Supported (`See note <#riscv-profiles-extensions-note>`__)
      ``Za64rs``        Supported (`See note <#riscv-profiles-extensions-note>`__)
@@ -308,9 +315,6 @@ LLVM supports (to various degrees) a number of experimental extensions.  All exp
 
 The primary goal of experimental support is to assist in the process of ratification by providing an existence proof of an implementation, and simplifying efforts to validate the value of a proposed extension against large code bases.  Experimental extensions are expected to either transition to ratified status, or be eventually removed.  The decision on whether to accept an experimental extension is currently done on an entirely case by case basis; if you want to propose one, attending the bi-weekly RISC-V sync-up call is strongly advised.
 
-``experimental-ssnpm``, ``experimental-smnpm``, ``experimental-smmpm``, ``experimental-sspm``, ``experimental-supm``
-  LLVM implements the `v1.0.0-rc2 specification <https://github.com/riscv/riscv-j-extension/releases/tag/pointer-masking-v1.0.0-rc2>`__.
-
 ``experimental-zalasr``
   LLVM implements the `0.0.5 draft specification <https://github.com/mehnadnerd/riscv-zalasr>`__.
 
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 706546980cf6718..d5c650e74eeb28e 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -88,7 +88,14 @@ Changes to the LLVM IR
   * `llvm.nvvm.ptr.shared.to.gen`
   * `llvm.nvvm.ptr.constant.to.gen`
   * `llvm.nvvm.ptr.local.to.gen`
-  
+
+* Remove the following intrinsics which can be relaced with a load from
+  addrspace(1) with an !invariant.load metadata
+
+  * `llvm.nvvm.ldg.global.i`
+  * `llvm.nvvm.ldg.global.f`
+  * `llvm.nvvm.ldg.global.p`
+
 * Operand bundle values can now be metadata strings.
 
 Changes to LLVM infrastructure
@@ -110,6 +117,9 @@ Changes to the AArch64 Backend
   the required alignment space with a sequence of `0x0` bytes (the requested
   fill value) rather than NOPs.
 
+* Assembler/disassembler support has been added for Armv9.6-A (2024)
+  architecture extensions.
+
 Changes to the AMDGPU Backend
 -----------------------------
 
@@ -168,13 +178,27 @@ Changes to the RISC-V Backend
   means Zve32x and Zve32f will also require Zvl64b. The prior support was
   largely untested.
 * The `Zvbc32e` and `Zvkgs` extensions are now supported experimentally.
-* Added `Smctr` and `Ssctr` extensions.
+* Added `Smctr`, `Ssctr` and `Svvptc` extensions.
 * `-mcpu=syntacore-scr7` was added.
 * The `Zacas` extension is no longer marked as experimental.
+* The `Smmpm`, `Smnpm`, `Ssnpm`, `Supm`, and `Sspm` pointer masking extensions
+  are no longer marked as experimental.
+* The `Sha` extension is now supported.
+* The RVA23U64, RVA23S64, RVB23U64, and RVB23S64 profiles are no longer marked
+  as experimental.
 
 Changes to the WebAssembly Backend
 ----------------------------------
 
+The default target CPU, "generic", now enables the `-mnontrapping-fptoint`
+and `-mbulk-memory` flags, which correspond to the [Bulk Memory Operations]
+and [Non-trapping float-to-int Conversions] language features, which are
+[widely implemented in engines].
+
+[Bulk Memory Operations]: https://github.com/WebAssembly/bulk-memory-operations/blob/master/proposals/bulk-memory-operations/Overview.md
+[Non-trapping float-to-int Conversions]: https://github.com/WebAssembly/spec/blob/master/proposals/nontrapping-float-to-int-conversion/Overview.md
+[widely implemented in engines]: https://webassembly.org/features/
+
 Changes to the Windows Target
 -----------------------------
 
@@ -198,6 +222,10 @@ Changes to the X86 Backend
 
 * Supported instructions of `MOVRS AND AVX10.2`
 
+* Supported ISA of `SM4(EVEX)`.
+
+* Supported ISA of `MSR_IMM`.
+
 Changes to the OCaml bindings
 -----------------------------
 
@@ -273,6 +301,8 @@ Changes to LLDB
 * LLDB can now read the `fpmr` register from AArch64 Linux processes and core
   files.
 
+* Program stdout/stderr redirection will now open the file with O_TRUNC flag, make sure to truncate the file if path already exists.
+  * eg. `settings set target.output-path/target.error-path <path/to/file>`
 
 Changes to BOLT
 ---------------------------------
diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst
index 86101ffbd9ca5dd..0b204d512876a37 100644
--- a/llvm/docs/UserGuides.rst
+++ b/llvm/docs/UserGuides.rst
@@ -286,7 +286,7 @@ Additional Topics
    DirectX runtime.
 
 :doc:`RISCVUsage`
-   This document describes using the RISCV-V target.
+   This document describes using the RISC-V target.
 
 :doc:`RISCV/RISCVVectorExtension`
    This document describes how the RISC-V Vector extension can be expressed in LLVM IR and how code is generated for it in the backend.
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index 6d8891e70577222..f7d81636f4dd4ea 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -1415,6 +1415,52 @@ LLVMMetadataRef LLVMInstructionGetDebugLoc(LLVMValueRef Inst);
  */
 void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
 
+/**
+ * Create a new descriptor for a label
+ *
+ * \param Builder         The DIBuilder.
+ * \param Scope           The scope to create the label in.
+ * \param Name            Variable name.
+ * \param NameLen         Length of variable name.
+ * \param File            The file to create the label in.
+ * \param LineNo          Line Number.
+ * \param AlwaysPreserve  Preserve the label regardless of optimization.
+ *
+ * @see llvm::DIBuilder::createLabel()
+ */
+LLVMMetadataRef LLVMDIBuilderCreateLabel(
+    LLVMDIBuilderRef Builder,
+    LLVMMetadataRef Context, const char *Name, size_t NameLen,
+    LLVMMetadataRef File, unsigned LineNo, LLVMBool AlwaysPreserve);
+
+/**
+ * Insert a new llvm.dbg.label intrinsic call
+ *
+ * \param Builder         The DIBuilder.
+ * \param LabelInfo       The Label's debug info descriptor
+ * \param Location        The debug info location
+ * \param InsertBefore    Location for the new intrinsic.
+ *
+ * @see llvm::DIBuilder::insertLabel()
+ */
+LLVMDbgRecordRef LLVMDIBuilderInsertLabelBefore(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo,
+    LLVMMetadataRef Location, LLVMValueRef InsertBefore);
+
+/**
+ * Insert a new llvm.dbg.label intrinsic call
+ *
+ * \param Builder         The DIBuilder.
+ * \param LabelInfo       The Label's debug info descriptor
+ * \param Location        The debug info location
+ * \param InsertAtEnd     Location for the new intrinsic.
+ *
+ * @see llvm::DIBuilder::insertLabel()
+ */
+LLVMDbgRecordRef LLVMDIBuilderInsertLabelAtEnd(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo,
+    LLVMMetadataRef Location, LLVMBasicBlockRef InsertAtEnd);
+
 /**
  * Obtain the enumerated type of a Metadata instance.
  *
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index bf6b55923b84ba3..1139fd81cbd07f2 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -70,15 +70,16 @@ namespace llvm {
     /*implicit*/ ArrayRef(std::nullopt_t) {}
 
     /// Construct an ArrayRef from a single element.
-    /*implicit*/ ArrayRef(const T &OneElt)
-      : Data(&OneElt), Length(1) {}
+    /*implicit*/ ArrayRef(const T &OneElt LLVM_LIFETIME_BOUND)
+        : Data(&OneElt), Length(1) {}
 
     /// Construct an ArrayRef from a pointer and length.
-    constexpr /*implicit*/ ArrayRef(const T *data, size_t length)
+    constexpr /*implicit*/ ArrayRef(const T *data LLVM_LIFETIME_BOUND,
+                                    size_t length)
         : Data(data), Length(length) {}
 
     /// Construct an ArrayRef from a range.
-    constexpr ArrayRef(const T *begin, const T *end)
+    constexpr ArrayRef(const T *begin LLVM_LIFETIME_BOUND, const T *end)
         : Data(begin), Length(end - begin) {
       assert(begin <= end);
     }
@@ -103,7 +104,8 @@ namespace llvm {
 
     /// Construct an ArrayRef from a C array.
     template <size_t N>
-    /*implicit*/ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {}
+    /*implicit*/ constexpr ArrayRef(const T (&Arr LLVM_LIFETIME_BOUND)[N])
+        : Data(Arr), Length(N) {}
 
     /// Construct an ArrayRef from a std::initializer_list.
 #if LLVM_GNUC_PREREQ(9, 0, 0)
@@ -113,7 +115,8 @@ namespace llvm {
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Winit-list-lifetime"
 #endif
-    constexpr /*implicit*/ ArrayRef(std::initializer_list<T> Vec)
+    constexpr /*implicit*/ ArrayRef(
+        std::initializer_list<T> Vec LLVM_LIFETIME_BOUND)
         : Data(Vec.begin() == Vec.end() ? (T *)nullptr : Vec.begin()),
           Length(Vec.size()) {}
 #if LLVM_GNUC_PREREQ(9, 0, 0)
diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index 3d2c5f428835587..41ba8bf8fde14b3 100644
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -47,6 +47,11 @@ bool GenericCycle<ContextT>::contains(const GenericCycle *C) const {
 template <typename ContextT>
 void GenericCycle<ContextT>::getExitBlocks(
     SmallVectorImpl<BlockT *> &TmpStorage) const {
+  if (!ExitBlocksCache.empty()) {
+    TmpStorage = ExitBlocksCache;
+    return;
+  }
+
   TmpStorage.clear();
 
   size_t NumExitBlocks = 0;
@@ -65,6 +70,7 @@ void GenericCycle<ContextT>::getExitBlocks(
 
     TmpStorage.resize(NumExitBlocks);
   }
+  ExitBlocksCache.append(TmpStorage.begin(), TmpStorage.end());
 }
 
 template <typename ContextT>
@@ -298,6 +304,8 @@ void GenericCycleInfo<ContextT>::moveTopLevelCycleToNewParent(CycleT *NewParent,
   for (auto &It : BlockMapTopLevel)
     if (It.second == Child)
       It.second = NewParent;
+  NewParent->clearCache();
+  Child->clearCache();
 }
 
 template <typename ContextT>
@@ -316,6 +324,7 @@ void GenericCycleInfo<ContextT>::addBlockToCycle(BlockT *Block, CycleT *Cycle) {
   }
 
   BlockMapTopLevel.try_emplace(Block, Cycle);
+  Cycle->clearCache();
 }
 
 /// \brief Main function of the cycle info computations.
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index 8c2fa0490e638a3..b8b6e3e9967a4a3 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -74,16 +74,27 @@ template <typename ContextT> class GenericCycle {
   ///       always have the same depth.
   unsigned Depth = 0;
 
+  /// Cache for the results of GetExitBlocks
+  mutable SmallVector<BlockT *, 4> ExitBlocksCache;
+
   void clear() {
     Entries.clear();
     Children.clear();
     Blocks.clear();
     Depth = 0;
     ParentCycle = nullptr;
+    clearCache();
+  }
+
+  void appendEntry(BlockT *Block) {
+    Entries.push_back(Block);
+    clearCache();
   }
 
-  void appendEntry(BlockT *Block) { Entries.push_back(Block); }
-  void appendBlock(BlockT *Block) { Blocks.insert(Block); }
+  void appendBlock(BlockT *Block) {
+    Blocks.insert(Block);
+    clearCache();
+  }
 
   GenericCycle(const GenericCycle &) = delete;
   GenericCycle &operator=(const GenericCycle &) = delete;
@@ -102,6 +113,11 @@ template <typename ContextT> class GenericCycle {
     return Entries;
   }
 
+  /// Clear the cache of the cycle.
+  /// This should be run in all non-const function in GenericCycle
+  /// and GenericCycleInfo.
+  void clearCache() const { ExitBlocksCache.clear(); }
+
   /// \brief Return whether \p Block is an entry block of the cycle.
   bool isEntry(const BlockT *Block) const {
     return is_contained(Entries, Block);
@@ -112,6 +128,7 @@ template <typename ContextT> class GenericCycle {
     assert(contains(Block));
     Entries.clear();
     Entries.push_back(Block);
+    clearCache();
   }
 
   /// \brief Return whether \p Block is contained in the cycle.
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 98b51cc1aebd59e..d93af5aedc39d70 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -116,9 +116,7 @@ class StringMapEntry final : public StringMapEntryStorage<ValueTy> {
     return reinterpret_cast<const char *>(this + 1);
   }
 
-  StringRef first() const {
-    return StringRef(getKeyData(), this->getKeyLength());
-  }
+  StringRef first() const { return getKey(); }
 
   /// Create a StringMapEntry for the specified key construct the value using
   /// \p InitiVals.
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 17ab10b9181f1ab..5b525c8e56ecc9f 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -85,7 +85,7 @@ namespace llvm {
     StringRef(std::nullptr_t) = delete;
 
     /// Construct a string ref from a cstring.
-    /*implicit*/ constexpr StringRef(const char *Str)
+    /*implicit*/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND)
         : Data(Str), Length(Str ?
     // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen.
 #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8
@@ -97,12 +97,13 @@ namespace llvm {
     }
 
     /// Construct a string ref from a pointer and length.
-    /*implicit*/ constexpr StringRef(const char *data, size_t length)
+    /*implicit*/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND,
+                                     size_t length)
         : Data(data), Length(length) {}
 
     /// Construct a string ref from an std::string.
     /*implicit*/ StringRef(const std::string &Str)
-      : Data(Str.data()), Length(Str.length()) {}
+        : Data(Str.data()), Length(Str.length()) {}
 
     /// Construct a string ref from an std::string_view.
     /*implicit*/ constexpr StringRef(std::string_view Str)
@@ -112,9 +113,9 @@ namespace llvm {
     /// @name Iterators
     /// @{
 
-    iterator begin() const { return Data; }
+    iterator begin() const { return data(); }
 
-    iterator end() const { return Data + Length; }
+    iterator end() const { return data() + size(); }
 
     reverse_iterator rbegin() const {
       return std::make_reverse_iterator(end());
@@ -143,7 +144,7 @@ namespace llvm {
     [[nodiscard]] constexpr const char *data() const { return Data; }
 
     /// empty - Check if the string is empty.
-    [[nodiscard]] constexpr bool empty() const { return Length == 0; }
+    [[nodiscard]] constexpr bool empty() const { return size() == 0; }
 
     /// size - Get the string size.
     [[nodiscard]] constexpr size_t size() const { return Length; }
@@ -151,13 +152,13 @@ namespace llvm {
     /// front - Get the first character in the string.
     [[nodiscard]] char front() const {
       assert(!empty());
-      return Data[0];
+      return data()[0];
     }
 
     /// back - Get the last character in the string.
     [[nodiscard]] char back() const {
       assert(!empty());
-      return Data[Length-1];
+      return data()[size() - 1];
     }
 
     // copy - Allocate copy in Allocator and return StringRef to it.
@@ -166,14 +167,14 @@ namespace llvm {
       // Don't request a length 0 copy from the allocator.
       if (empty())
         return StringRef();
-      char *S = A.template Allocate<char>(Length);
+      char *S = A.template Allocate<char>(size());
       std::copy(begin(), end(), S);
-      return StringRef(S, Length);
+      return StringRef(S, size());
     }
 
     /// Check for string equality, ignoring case.
     [[nodiscard]] bool equals_insensitive(StringRef RHS) const {
-      return Length == RHS.Length && compare_insensitive(RHS) == 0;
+      return size() == RHS.size() && compare_insensitive(RHS) == 0;
     }
 
     /// compare - Compare two strings; the result is negative, zero, or positive
@@ -181,13 +182,14 @@ namespace llvm {
     /// the \p RHS.
     [[nodiscard]] int compare(StringRef RHS) const {
       // Check the prefix for a mismatch.
-      if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
+      if (int Res =
+              compareMemory(data(), RHS.data(), std::min(size(), RHS.size())))
         return Res < 0 ? -1 : 1;
 
       // Otherwise the prefixes match, so we only need to check the lengths.
-      if (Length == RHS.Length)
+      if (size() == RHS.size())
         return 0;
-      return Length < RHS.Length ? -1 : 1;
+      return size() < RHS.size() ? -1 : 1;
     }
 
     /// Compare two strings, ignoring case.
@@ -225,8 +227,9 @@ namespace llvm {
 
     /// str - Get the contents as an std::string.
     [[nodiscard]] std::string str() const {
-      if (!Data) return std::string();
-      return std::string(Data, Length);
+      if (!data())
+        return std::string();
+      return std::string(data(), size());
     }
 
     /// @}
@@ -234,8 +237,8 @@ namespace llvm {
     /// @{
 
     [[nodiscard]] char operator[](size_t Index) const {
-      assert(Index < Length && "Invalid index!");
-      return Data[Index];
+      assert(Index < size() && "Invalid index!");
+      return data()[Index];
     }
 
     /// Disallow accidental assignment from a temporary std::string.
@@ -260,8 +263,8 @@ namespace llvm {
 
     /// Check if this string starts with the given \p Prefix.
     [[nodiscard]] bool starts_with(StringRef Prefix) const {
-      return Length >= Prefix.Length &&
-             compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
+      return size() >= Prefix.size() &&
+             compareMemory(data(), Prefix.data(), Prefix.size()) == 0;
     }
     [[nodiscard]] bool starts_with(char Prefix) const {
       return !empty() && front() == Prefix;
@@ -272,9 +275,9 @@ namespace llvm {
 
     /// Check if this string ends with the given \p Suffix.
     [[nodiscard]] bool ends_with(StringRef Suffix) const {
-      return Length >= Suffix.Length &&
-             compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) ==
-                 0;
+      return size() >= Suffix.size() &&
+             compareMemory(end() - Suffix.size(), Suffix.data(),
+                           Suffix.size()) == 0;
     }
     [[nodiscard]] bool ends_with(char Suffix) const {
       return !empty() && back() == Suffix;
@@ -342,10 +345,10 @@ namespace llvm {
     /// \returns The index of the last occurrence of \p C, or npos if not
     /// found.
     [[nodiscard]] size_t rfind(char C, size_t From = npos) const {
-      size_t I = std::min(From, Length);
+      size_t I = std::min(From, size());
       while (I) {
         --I;
-        if (Data[I] == C)
+        if (data()[I] == C)
           return I;
       }
       return npos;
@@ -447,8 +450,8 @@ namespace llvm {
     /// Return the number of occurrences of \p C in the string.
     [[nodiscard]] size_t count(char C) const {
       size_t Count = 0;
-      for (size_t I = 0; I != Length; ++I)
-        if (Data[I] == C)
+      for (size_t I = 0; I != size(); ++I)
+        if (data()[I] == C)
           ++Count;
       return Count;
     }
@@ -567,8 +570,8 @@ namespace llvm {
     /// suffix (starting with \p Start) will be returned.
     [[nodiscard]] constexpr StringRef substr(size_t Start,
                                              size_t N = npos) const {
-      Start = std::min(Start, Length);
-      return StringRef(Data + Start, std::min(N, Length - Start));
+      Start = std::min(Start, size());
+      return StringRef(data() + Start, std::min(N, size() - Start));
     }
 
     /// Return a StringRef equal to 'this' but with only the first \p N
@@ -679,9 +682,9 @@ namespace llvm {
     /// will be returned. If this is less than \p Start, an empty string will
     /// be returned.
     [[nodiscard]] StringRef slice(size_t Start, size_t End) const {
-      Start = std::min(Start, Length);
-      End = std::clamp(End, Start, Length);
-      return StringRef(Data + Start, End - Start);
+      Start = std::min(Start, size());
+      End = std::clamp(End, Start, size());
+      return StringRef(data() + Start, End - Start);
     }
 
     /// Split into two substrings around the first occurrence of a separator
@@ -786,25 +789,25 @@ namespace llvm {
     /// Return string with consecutive \p Char characters starting from the
     /// the left removed.
     [[nodiscard]] StringRef ltrim(char Char) const {
-      return drop_front(std::min(Length, find_first_not_of(Char)));
+      return drop_front(std::min(size(), find_first_not_of(Char)));
     }
 
     /// Return string with consecutive characters in \p Chars starting from
     /// the left removed.
     [[nodiscard]] StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
-      return drop_front(std::min(Length, find_first_not_of(Chars)));
+      return drop_front(std::min(size(), find_first_not_of(Chars)));
     }
 
     /// Return string with consecutive \p Char characters starting from the
     /// right removed.
     [[nodiscard]] StringRef rtrim(char Char) const {
-      return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
+      return drop_back(size() - std::min(size(), find_last_not_of(Char) + 1));
     }
 
     /// Return string with consecutive characters in \p Chars starting from
     /// the right removed.
     [[nodiscard]] StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
-      return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
+      return drop_back(size() - std::min(size(), find_last_not_of(Chars) + 1));
     }
 
     /// Return string with consecutive \p Char characters starting from the
@@ -831,9 +834,9 @@ namespace llvm {
         // If there is no carriage return, assume unix
         return "\n";
       }
-      if (Pos + 1 < Length && Data[Pos + 1] == '\n')
+      if (Pos + 1 < size() && data()[Pos + 1] == '\n')
         return "\r\n"; // Windows
-      if (Pos > 0 && Data[Pos - 1] == '\n')
+      if (Pos > 0 && data()[Pos - 1] == '\n')
         return "\n\r"; // You monster!
       return "\r";     // Classic Mac
     }
diff --git a/llvm/include/llvm/ADT/TrieHashIndexGenerator.h b/llvm/include/llvm/ADT/TrieHashIndexGenerator.h
new file mode 100644
index 000000000000000..6f7e53b6b11b539
--- /dev/null
+++ b/llvm/include/llvm/ADT/TrieHashIndexGenerator.h
@@ -0,0 +1,122 @@
+//===- TrieHashIndexGenerator.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TRIEHASHINDEXGENERATOR_H
+#define LLVM_ADT_TRIEHASHINDEXGENERATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include <optional>
+
+namespace llvm {
+
+/// The utility class that helps computing the index of the object inside trie
+/// from its hash. The generator can be configured with the number of bits
+/// used for each level of trie structure with \c NumRootsBits and \c
+/// NumSubtrieBits.
+/// For example, try computing indexes for a 16-bit hash 0x1234 with 8-bit root
+/// and 4-bit sub-trie:
+///
+///   IndexGenerator IndexGen{8, 4, Hash};
+///   size_t index1 = IndexGen.next(); // index 18 in root node.
+///   size_t index2 = IndexGen.next(); // index 3 in sub-trie level 1.
+///   size_t index3 = IndexGen.next(); // index 4 in sub-tire level 2.
+///
+/// This is used by different trie implementation to figure out where to
+/// insert/find the object in the data structure.
+struct TrieHashIndexGenerator {
+  size_t NumRootBits;
+  size_t NumSubtrieBits;
+  ArrayRef<uint8_t> Bytes;
+  std::optional<size_t> StartBit = std::nullopt;
+
+  // Get the number of bits used to generate current index.
+  size_t getNumBits() const {
+    assert(StartBit);
+    size_t TotalNumBits = Bytes.size() * 8;
+    assert(*StartBit <= TotalNumBits);
+    return std::min(*StartBit ? NumSubtrieBits : NumRootBits,
+                    TotalNumBits - *StartBit);
+  }
+
+  // Get the index of the object in the next level of trie.
+  size_t next() {
+    if (!StartBit) {
+      // Compute index for root when StartBit is not set.
+      StartBit = 0;
+      return getIndex(Bytes, *StartBit, NumRootBits);
+    }
+    if (*StartBit < Bytes.size() * 8) {
+      // Compute index for sub-trie.
+      *StartBit += *StartBit ? NumSubtrieBits : NumRootBits;
+      assert((*StartBit - NumRootBits) % NumSubtrieBits == 0);
+      return getIndex(Bytes, *StartBit, NumSubtrieBits);
+    }
+    // All the bits are consumed.
+    return end();
+  }
+
+  // Provide a hint to speed up the index generation by providing the
+  // information of the hash in current level. For example, if the object is
+  // known to have \c Index on a level that already consumes first n \c Bits of
+  // the hash, it can start index generation from this level by calling \c hint
+  // function.
+  size_t hint(unsigned Index, unsigned Bit) {
+    assert(Bit < Bytes.size() * 8);
+    assert(Bit == 0 || (Bit - NumRootBits) % NumSubtrieBits == 0);
+    StartBit = Bit;
+    return Index;
+  }
+
+  // Utility function for looking up the index in the trie for an object that
+  // has colliding hash bits in the front as the hash of the object that is
+  // currently being computed.
+  size_t getCollidingBits(ArrayRef<uint8_t> CollidingBits) const {
+    assert(StartBit);
+    return getIndex(CollidingBits, *StartBit, NumSubtrieBits);
+  }
+
+  size_t end() const { return SIZE_MAX; }
+
+  // Compute the index for the object from its hash, current start bits, and
+  // the number of bits used for current level.
+  static size_t getIndex(ArrayRef<uint8_t> Bytes, size_t StartBit,
+                         size_t NumBits) {
+    assert(StartBit < Bytes.size() * 8);
+    // Drop all the bits before StartBit.
+    Bytes = Bytes.drop_front(StartBit / 8u);
+    StartBit %= 8u;
+    size_t Index = 0;
+    // Compute the index using the bits in range [StartBit, StartBit + NumBits),
+    // note the range can spread across few `uint8_t` in the array.
+    for (uint8_t Byte : Bytes) {
+      size_t ByteStart = 0, ByteEnd = 8;
+      if (StartBit) {
+        ByteStart = StartBit;
+        Byte &= (1u << (8 - StartBit)) - 1u;
+        StartBit = 0;
+      }
+      size_t CurrentNumBits = ByteEnd - ByteStart;
+      if (CurrentNumBits > NumBits) {
+        Byte >>= CurrentNumBits - NumBits;
+        CurrentNumBits = NumBits;
+      }
+      Index <<= CurrentNumBits;
+      Index |= Byte & ((1u << CurrentNumBits) - 1u);
+
+      assert(NumBits >= CurrentNumBits);
+      NumBits -= CurrentNumBits;
+      if (!NumBits)
+        break;
+    }
+    return Index;
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_TRIEHASHINDEXGENERATOR_H
diff --git a/llvm/include/llvm/ADT/TrieRawHashMap.h b/llvm/include/llvm/ADT/TrieRawHashMap.h
new file mode 100644
index 000000000000000..5bfe5c9e6a0f495
--- /dev/null
+++ b/llvm/include/llvm/ADT/TrieRawHashMap.h
@@ -0,0 +1,377 @@
+//===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TRIERAWHASHMAP_H
+#define LLVM_ADT_TRIERAWHASHMAP_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include <atomic>
+#include <optional>
+
+namespace llvm {
+
+class raw_ostream;
+
+/// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to
+/// store/index data based on a hash value. It can be customized to work with
+/// any hash algorithm or store any data.
+///
+/// Data structure:
+/// Data node stored in the Trie contains both hash and data:
+/// struct {
+///    HashT Hash;
+///    DataT Data;
+/// };
+///
+/// Data is stored/indexed via a prefix tree, where each node in the tree can be
+/// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two
+/// data objects {0001, A} and {0100, B}, it can be stored in a trie
+/// (assuming Root has 2 bits, SubTrie has 1 bit):
+///  +--------+
+///  |Root[00]| -> {0001, A}
+///  |    [01]| -> {0100, B}
+///  |    [10]| (empty)
+///  |    [11]| (empty)
+///  +--------+
+///
+/// Inserting a new object {0010, C} will result in:
+///  +--------+    +----------+
+///  |Root[00]| -> |SubTrie[0]| -> {0001, A}
+///  |        |    |       [1]| -> {0010, C}
+///  |        |    +----------+
+///  |    [01]| -> {0100, B}
+///  |    [10]| (empty)
+///  |    [11]| (empty)
+///  +--------+
+/// Note object A is sunk down to a sub-trie during the insertion. All the
+/// nodes are inserted through compare-exchange to ensure thread-safe and
+/// lock-free.
+///
+/// To find an object in the trie, walk the tree with prefix of the hash until
+/// the data node is found. Then the hash is compared with the hash stored in
+/// the data node to see if the is the same object.
+///
+/// Hash collision is not allowed so it is recommended to use trie with a
+/// "strong" hashing algorithm. A well-distributed hash can also result in
+/// better performance and memory usage.
+///
+/// It currently does not support iteration and deletion.
+
+/// Base class for a lock-free thread-safe hash-mapped trie.
+class ThreadSafeTrieRawHashMapBase {
+public:
+  static constexpr size_t TrieContentBaseSize = 4;
+  static constexpr size_t DefaultNumRootBits = 6;
+  static constexpr size_t DefaultNumSubtrieBits = 4;
+
+private:
+  template <class T> struct AllocValueType {
+    char Base[TrieContentBaseSize];
+    std::aligned_union_t<sizeof(T), T> Content;
+  };
+
+protected:
+  template <class T>
+  static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType<T>);
+
+  template <class T>
+  static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType<T>);
+
+  template <class T>
+  static constexpr size_t DefaultContentOffset =
+      offsetof(AllocValueType<T>, Content);
+
+public:
+  static void *operator new(size_t Size) { return ::operator new(Size); }
+  void operator delete(void *Ptr) { ::operator delete(Ptr); }
+
+  LLVM_DUMP_METHOD void dump() const;
+  void print(raw_ostream &OS) const;
+
+protected:
+  /// Result of a lookup. Suitable for an insertion hint. Maybe could be
+  /// expanded into an iterator of sorts, but likely not useful (visiting
+  /// everything in the trie should probably be done some way other than
+  /// through an iterator pattern).
+  class PointerBase {
+  protected:
+    void *get() const { return I == -2u ? P : nullptr; }
+
+  public:
+    PointerBase() noexcept = default;
+
+  private:
+    friend class ThreadSafeTrieRawHashMapBase;
+    explicit PointerBase(void *Content) : P(Content), I(-2u) {}
+    PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {}
+
+    bool isHint() const { return I != -1u && I != -2u; }
+
+    void *P = nullptr;
+    unsigned I = -1u;
+    unsigned B = 0;
+  };
+
+  /// Find the stored content with hash.
+  PointerBase find(ArrayRef<uint8_t> Hash) const;
+
+  /// Insert and return the stored content.
+  PointerBase
+  insert(PointerBase Hint, ArrayRef<uint8_t> Hash,
+         function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
+             Constructor);
+
+  ThreadSafeTrieRawHashMapBase() = delete;
+
+  ThreadSafeTrieRawHashMapBase(
+      size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
+      std::optional<size_t> NumRootBits = std::nullopt,
+      std::optional<size_t> NumSubtrieBits = std::nullopt);
+
+  /// Destructor, which asserts if there's anything to do. Subclasses should
+  /// call \a destroyImpl().
+  ///
+  /// \pre \a destroyImpl() was already called.
+  ~ThreadSafeTrieRawHashMapBase();
+  void destroyImpl(function_ref<void(void *ValueMem)> Destructor);
+
+  ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS);
+
+  // Move assignment is not supported as it is not thread-safe.
+  ThreadSafeTrieRawHashMapBase &
+  operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete;
+
+  // No copy.
+  ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete;
+  ThreadSafeTrieRawHashMapBase &
+  operator=(const ThreadSafeTrieRawHashMapBase &) = delete;
+
+  // Debug functions. Implementation details and not guaranteed to be
+  // thread-safe.
+  PointerBase getRoot() const;
+  unsigned getStartBit(PointerBase P) const;
+  unsigned getNumBits(PointerBase P) const;
+  unsigned getNumSlotUsed(PointerBase P) const;
+  std::string getTriePrefixAsString(PointerBase P) const;
+  unsigned getNumTries() const;
+  // Visit next trie in the allocation chain.
+  PointerBase getNextTrie(PointerBase P) const;
+
+private:
+  friend class TrieRawHashMapTestHelper;
+  const unsigned short ContentAllocSize;
+  const unsigned short ContentAllocAlign;
+  const unsigned short ContentOffset;
+  unsigned short NumRootBits;
+  unsigned short NumSubtrieBits;
+  class ImplType;
+  // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in
+  // destroyImpl.
+  std::atomic<ImplType *> ImplPtr;
+  ImplType &getOrCreateImpl();
+  ImplType *getImpl() const;
+};
+
+/// Lock-free thread-safe hash-mapped trie.
+template <class T, size_t NumHashBytes>
+class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase {
+public:
+  using HashT = std::array<uint8_t, NumHashBytes>;
+
+  class LazyValueConstructor;
+  struct value_type {
+    const HashT Hash;
+    T Data;
+
+    value_type(value_type &&) = default;
+    value_type(const value_type &) = default;
+
+    value_type(ArrayRef<uint8_t> Hash, const T &Data)
+        : Hash(makeHash(Hash)), Data(Data) {}
+    value_type(ArrayRef<uint8_t> Hash, T &&Data)
+        : Hash(makeHash(Hash)), Data(std::move(Data)) {}
+
+  private:
+    friend class LazyValueConstructor;
+
+    struct EmplaceTag {};
+    template <class... ArgsT>
+    value_type(ArrayRef<uint8_t> Hash, EmplaceTag, ArgsT &&...Args)
+        : Hash(makeHash(Hash)), Data(std::forward<ArgsT>(Args)...) {}
+
+    static HashT makeHash(ArrayRef<uint8_t> HashRef) {
+      HashT Hash;
+      std::copy(HashRef.begin(), HashRef.end(), Hash.data());
+      return Hash;
+    }
+  };
+
+  using ThreadSafeTrieRawHashMapBase::operator delete;
+  using HashType = HashT;
+
+  using ThreadSafeTrieRawHashMapBase::dump;
+  using ThreadSafeTrieRawHashMapBase::print;
+
+private:
+  template <class ValueT> class PointerImpl : PointerBase {
+    friend class ThreadSafeTrieRawHashMap;
+
+    ValueT *get() const {
+      return reinterpret_cast<ValueT *>(PointerBase::get());
+    }
+
+  public:
+    ValueT &operator*() const {
+      assert(get());
+      return *get();
+    }
+    ValueT *operator->() const {
+      assert(get());
+      return get();
+    }
+    explicit operator bool() const { return get(); }
+
+    PointerImpl() = default;
+
+  protected:
+    PointerImpl(PointerBase Result) : PointerBase(Result) {}
+  };
+
+public:
+  class pointer;
+  class const_pointer;
+  class pointer : public PointerImpl<value_type> {
+    friend class ThreadSafeTrieRawHashMap;
+    friend class const_pointer;
+
+  public:
+    pointer() = default;
+
+  private:
+    pointer(PointerBase Result) : pointer::PointerImpl(Result) {}
+  };
+
+  class const_pointer : public PointerImpl<const value_type> {
+    friend class ThreadSafeTrieRawHashMap;
+
+  public:
+    const_pointer() = default;
+    const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {}
+
+  private:
+    const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {}
+  };
+
+  class LazyValueConstructor {
+  public:
+    value_type &operator()(T &&RHS) {
+      assert(Mem && "Constructor already called, or moved away");
+      return assign(::new (Mem) value_type(Hash, std::move(RHS)));
+    }
+    value_type &operator()(const T &RHS) {
+      assert(Mem && "Constructor already called, or moved away");
+      return assign(::new (Mem) value_type(Hash, RHS));
+    }
+    template <class... ArgsT> value_type &emplace(ArgsT &&...Args) {
+      assert(Mem && "Constructor already called, or moved away");
+      return assign(::new (Mem)
+                        value_type(Hash, typename value_type::EmplaceTag{},
+                                   std::forward<ArgsT>(Args)...));
+    }
+
+    LazyValueConstructor(LazyValueConstructor &&RHS)
+        : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) {
+      RHS.Mem = nullptr; // Moved away, cannot call.
+    }
+    ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); }
+
+  private:
+    value_type &assign(value_type *V) {
+      Mem = nullptr;
+      Result = V;
+      return *V;
+    }
+    friend class ThreadSafeTrieRawHashMap;
+    LazyValueConstructor() = delete;
+    LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef<uint8_t> Hash)
+        : Mem(Mem), Result(Result), Hash(Hash) {
+      assert(Hash.size() == sizeof(HashT) && "Invalid hash");
+      assert(Mem && "Invalid memory for construction");
+    }
+    void *Mem;
+    value_type *&Result;
+    ArrayRef<uint8_t> Hash;
+  };
+
+  /// Insert with a hint. Default-constructed hint will work, but it's
+  /// recommended to start with a lookup to avoid overhead in object creation
+  /// if it already exists.
+  pointer insertLazy(const_pointer Hint, ArrayRef<uint8_t> Hash,
+                     function_ref<void(LazyValueConstructor)> OnConstruct) {
+    return pointer(ThreadSafeTrieRawHashMapBase::insert(
+        Hint, Hash, [&](void *Mem, ArrayRef<uint8_t> Hash) {
+          value_type *Result = nullptr;
+          OnConstruct(LazyValueConstructor(Mem, Result, Hash));
+          return Result->Hash.data();
+        }));
+  }
+
+  pointer insertLazy(ArrayRef<uint8_t> Hash,
+                     function_ref<void(LazyValueConstructor)> OnConstruct) {
+    return insertLazy(const_pointer(), Hash, OnConstruct);
+  }
+
+  pointer insert(const_pointer Hint, value_type &&HashedData) {
+    return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) {
+      C(std::move(HashedData.Data));
+    });
+  }
+
+  pointer insert(const_pointer Hint, const value_type &HashedData) {
+    return insertLazy(Hint, HashedData.Hash,
+                      [&](LazyValueConstructor C) { C(HashedData.Data); });
+  }
+
+  pointer find(ArrayRef<uint8_t> Hash) {
+    assert(Hash.size() == std::tuple_size<HashT>::value);
+    return ThreadSafeTrieRawHashMapBase::find(Hash);
+  }
+
+  const_pointer find(ArrayRef<uint8_t> Hash) const {
+    assert(Hash.size() == std::tuple_size<HashT>::value);
+    return ThreadSafeTrieRawHashMapBase::find(Hash);
+  }
+
+  ThreadSafeTrieRawHashMap(std::optional<size_t> NumRootBits = std::nullopt,
+                           std::optional<size_t> NumSubtrieBits = std::nullopt)
+      : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize<value_type>,
+                                     DefaultContentAllocAlign<value_type>,
+                                     DefaultContentOffset<value_type>,
+                                     NumRootBits, NumSubtrieBits) {}
+
+  ~ThreadSafeTrieRawHashMap() {
+    if constexpr (std::is_trivially_destructible<value_type>::value)
+      this->destroyImpl(nullptr);
+    else
+      this->destroyImpl(
+          [](void *P) { static_cast<value_type *>(P)->~value_type(); });
+  }
+
+  // Move constructor okay.
+  ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&) = default;
+
+  // No move assignment or any copy.
+  ThreadSafeTrieRawHashMap &operator=(ThreadSafeTrieRawHashMap &&) = delete;
+  ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &) = delete;
+  ThreadSafeTrieRawHashMap &
+  operator=(const ThreadSafeTrieRawHashMap &) = delete;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_TRIERAWHASHMAP_H
diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h
index 9f33c69aed345c9..4c9f063bc7d2c8c 100644
--- a/llvm/include/llvm/Analysis/StructuralHash.h
+++ b/llvm/include/llvm/Analysis/StructuralHash.h
@@ -13,15 +13,22 @@
 
 namespace llvm {
 
+enum class StructuralHashOptions {
+  None,              /// Hash with opcode only.
+  Detailed,          /// Hash with opcode and operands.
+  CallTargetIgnored, /// Ignore call target operand when computing hash.
+};
+
 /// Printer pass for  StructuralHashes
 class StructuralHashPrinterPass
     : public PassInfoMixin<StructuralHashPrinterPass> {
   raw_ostream &OS;
-  bool EnableDetailedStructuralHash;
+  const StructuralHashOptions Options;
 
 public:
-  explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed)
-      : OS(OS), EnableDetailedStructuralHash(Detailed) {}
+  explicit StructuralHashPrinterPass(raw_ostream &OS,
+                                     StructuralHashOptions Options)
+      : OS(OS), Options(Options) {}
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
 
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index f890e2b9ec4c82a..3e23e398f6a7976 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1140,6 +1140,21 @@ TLI_DEFINE_ENUM_INTERNAL(erfl)
 TLI_DEFINE_STRING_INTERNAL("erfl")
 TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl)
 
+/// double tgamma(double x);
+TLI_DEFINE_ENUM_INTERNAL(tgamma)
+TLI_DEFINE_STRING_INTERNAL("tgamma")
+TLI_DEFINE_SIG_INTERNAL(Dbl, Dbl)
+
+/// float tgammaf(float x);
+TLI_DEFINE_ENUM_INTERNAL(tgammaf)
+TLI_DEFINE_STRING_INTERNAL("tgammaf")
+TLI_DEFINE_SIG_INTERNAL(Flt, Flt)
+
+/// long double tgammal(long double x);
+TLI_DEFINE_ENUM_INTERNAL(tgammal)
+TLI_DEFINE_STRING_INTERNAL("tgammal")
+TLI_DEFINE_SIG_INTERNAL(LDbl, LDbl)
+
 /// int execl(const char *path, const char *arg, ...);
 TLI_DEFINE_ENUM_INTERNAL(execl)
 TLI_DEFINE_STRING_INTERNAL("execl")
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 9e543b844ad768f..5347c64e43e718f 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -408,35 +408,36 @@ class TargetLibraryInfo {
     switch (F) {
     default: break;
       // clang-format off
-    case LibFunc_copysign:     case LibFunc_copysignf:  case LibFunc_copysignl:
-    case LibFunc_fabs:         case LibFunc_fabsf:      case LibFunc_fabsl:
-    case LibFunc_sin:          case LibFunc_sinf:       case LibFunc_sinl:
-    case LibFunc_cos:          case LibFunc_cosf:       case LibFunc_cosl:
-    case LibFunc_tan:          case LibFunc_tanf:       case LibFunc_tanl:
-    case LibFunc_asin:         case LibFunc_asinf:      case LibFunc_asinl:
     case LibFunc_acos:         case LibFunc_acosf:      case LibFunc_acosl:
+    case LibFunc_asin:         case LibFunc_asinf:      case LibFunc_asinl:
     case LibFunc_atan:         case LibFunc_atanf:      case LibFunc_atanl:
-    case LibFunc_sinh:         case LibFunc_sinhf:      case LibFunc_sinhl:
+    case LibFunc_ceil:         case LibFunc_ceilf:      case LibFunc_ceill:
+    case LibFunc_copysign:     case LibFunc_copysignf:  case LibFunc_copysignl:
+    case LibFunc_cos:          case LibFunc_cosf:       case LibFunc_cosl:
     case LibFunc_cosh:         case LibFunc_coshf:      case LibFunc_coshl:
-    case LibFunc_tanh:         case LibFunc_tanhf:      case LibFunc_tanhl:
-    case LibFunc_sqrt:         case LibFunc_sqrtf:      case LibFunc_sqrtl:
-    case LibFunc_sqrt_finite:  case LibFunc_sqrtf_finite:
-                                                   case LibFunc_sqrtl_finite:
+    case LibFunc_exp2:         case LibFunc_exp2f:      case LibFunc_exp2l:
+    case LibFunc_exp10:        case LibFunc_exp10f:     case LibFunc_exp10l:
+    case LibFunc_fabs:         case LibFunc_fabsf:      case LibFunc_fabsl:
+    case LibFunc_floor:        case LibFunc_floorf:     case LibFunc_floorl:
     case LibFunc_fmax:         case LibFunc_fmaxf:      case LibFunc_fmaxl:
     case LibFunc_fmin:         case LibFunc_fminf:      case LibFunc_fminl:
-    case LibFunc_floor:        case LibFunc_floorf:     case LibFunc_floorl:
+    case LibFunc_ldexp:        case LibFunc_ldexpf:     case LibFunc_ldexpl:
+    case LibFunc_log2:         case LibFunc_log2f:      case LibFunc_log2l:
+    case LibFunc_memcmp:       case LibFunc_bcmp:       case LibFunc_strcmp:
+    case LibFunc_memcpy:       case LibFunc_memset:     case LibFunc_memmove:
     case LibFunc_nearbyint:    case LibFunc_nearbyintf: case LibFunc_nearbyintl:
-    case LibFunc_ceil:         case LibFunc_ceilf:      case LibFunc_ceill:
     case LibFunc_rint:         case LibFunc_rintf:      case LibFunc_rintl:
     case LibFunc_round:        case LibFunc_roundf:     case LibFunc_roundl:
-    case LibFunc_trunc:        case LibFunc_truncf:     case LibFunc_truncl:
-    case LibFunc_log2:         case LibFunc_log2f:      case LibFunc_log2l:
-    case LibFunc_exp2:         case LibFunc_exp2f:      case LibFunc_exp2l:
-    case LibFunc_ldexp:        case LibFunc_ldexpf:     case LibFunc_ldexpl:
-    case LibFunc_memcpy:       case LibFunc_memset:     case LibFunc_memmove:
-    case LibFunc_memcmp:       case LibFunc_bcmp:       case LibFunc_strcmp:
+    case LibFunc_sin:          case LibFunc_sinf:       case LibFunc_sinl:
+    case LibFunc_sinh:         case LibFunc_sinhf:      case LibFunc_sinhl:
+    case LibFunc_sqrt:         case LibFunc_sqrtf:      case LibFunc_sqrtl:
+    case LibFunc_sqrt_finite:  case LibFunc_sqrtf_finite:
+                                                   case LibFunc_sqrtl_finite:
     case LibFunc_strcpy:       case LibFunc_stpcpy:     case LibFunc_strlen:
     case LibFunc_strnlen:      case LibFunc_memchr:     case LibFunc_mempcpy:
+    case LibFunc_tan:          case LibFunc_tanf:       case LibFunc_tanl:
+    case LibFunc_tanh:         case LibFunc_tanhf:      case LibFunc_tanhl:
+    case LibFunc_trunc:        case LibFunc_truncf:     case LibFunc_truncl:
       // clang-format on
       return true;
     }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 0b7792f89a05c43..317c13917c0cfc8 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -177,7 +177,8 @@ class TargetTransformInfoImplBase {
         Name == "sinh"  || Name == "sinhf"  || Name == "sinhl" ||
         Name == "cosh"  || Name == "coshf"  || Name == "coshl" ||
         Name == "tanh"  || Name == "tanhf"  || Name == "tanhl" ||
-        Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
+        Name == "sqrt"  || Name == "sqrtf"  || Name == "sqrtl" ||
+        Name == "exp10"  || Name == "exp10l"  || Name == "exp10f")
       return false;
     // clang-format on
     // These are all likely to be optimized into something smaller.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index c4586894e3e490b..71ad3a35eb3f5e0 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -1328,14 +1328,17 @@ TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd2_log2", FIXED(2), NOMASK, "_ZGV_LLV
 TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd4_log2", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("llvm.log2.f64", "amd_vrd8_log2", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 
+TLI_DEFINE_VECFUNC("log10", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("log10f", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
 TLI_DEFINE_VECFUNC("log10f", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("log10f", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC("__log10_finite", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
 TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("__log10f_finite", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "amd_vrd2_log10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs16_log10f", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs8_log10f", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "amd_vrs4_log10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
@@ -1350,6 +1353,12 @@ TLI_DEFINE_VECFUNC("erf", "amd_vrd8_erf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("exp10", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("exp10f", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC("__exp10_finite", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("__exp10f_finite", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "amd_vrd2_exp10", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "amd_vrs4_exp10f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+
 TLI_DEFINE_VECFUNC("expm1", "amd_vrd2_expm1", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("expm1f", "amd_vrs4_expm1f", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 
@@ -1380,10 +1389,19 @@ TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LL
 TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("llvm.asin.f32", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
 
+TLI_DEFINE_VECFUNC("__asin_finite", "amd_vrd8_asin", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs4_asinf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs8_asinf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("__asinf_finite", "amd_vrs16_asinf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
+
 TLI_DEFINE_VECFUNC("acosf", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("acosf", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("acosf", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
 
+TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
+TLI_DEFINE_VECFUNC("__acosf_finite", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
+
 TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs16_acosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16v")
 TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs8_acosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8v")
 TLI_DEFINE_VECFUNC("llvm.acos.f32", "amd_vrs4_acosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
@@ -1421,6 +1439,12 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "amd_vrs16_tanhf", FIXED(16), NOMASK, "_ZGV_
 TLI_DEFINE_VECFUNC("cbrt", "amd_vrd2_cbrt", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("cbrtf", "amd_vrs4_cbrtf", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC("sincos", "amd_vrd4_sincos", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl8l8")
+TLI_DEFINE_VECFUNC("sincos", "amd_vrd8_sincos", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl8l8")
+
+TLI_DEFINE_VECFUNC("sincosf", "amd_vrs4_sincosf", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincosf", "amd_vrs8_sincosf", FIXED(8), NOMASK, "_ZGV_LLVM_N8vl4l4")
+TLI_DEFINE_VECFUNC("sincosf", "amd_vrs16_sincosf", FIXED(16), NOMASK, "_ZGV_LLVM_N16vl4l4")
 #else
 #error "Must choose which vector library functions are to be defined."
 #endif
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index d55947fc5103ac5..0cbbbe823c06b50 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -728,7 +728,7 @@ HANDLE_DW_OP(0x24, shl, 0, 2, 2, DWARF)
 HANDLE_DW_OP(0x25, shr, 0, 2, 2, DWARF)
 HANDLE_DW_OP(0x26, shra, 0, 2, 2, DWARF)
 HANDLE_DW_OP(0x27, xor, 0, 2, 2, DWARF)
-HANDLE_DW_OP(0x28, bra, 1, 0, 2, DWARF)
+HANDLE_DW_OP(0x28, bra, 1, 1, 2, DWARF)
 HANDLE_DW_OP(0x29, eq, 0, 2, 2, DWARF)
 HANDLE_DW_OP(0x2a, ge, 0, 2, 2, DWARF)
 HANDLE_DW_OP(0x2b, gt, 0, 2, 2, DWARF)
@@ -1238,6 +1238,7 @@ HANDLE_DW_CFA(0x16, val_expression)
 // Vendor extensions:
 HANDLE_DW_CFA_PRED(0x1d, MIPS_advance_loc8, SELECT_MIPS64)
 HANDLE_DW_CFA_PRED(0x2d, GNU_window_save, SELECT_SPARC)
+HANDLE_DW_CFA_PRED(0x2c, AARCH64_negate_ra_state_with_pc, SELECT_AARCH64)
 HANDLE_DW_CFA_PRED(0x2d, AARCH64_negate_ra_state, SELECT_AARCH64)
 HANDLE_DW_CFA_PRED(0x2e, GNU_args_size, SELECT_X86)
 // Heterogeneous Debugging Extension defined at
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 08574cc356e5144..41a6447356c23b6 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -768,7 +768,7 @@ enum AttributeKindCodes {
   ATTR_KIND_INITIALIZES = 94,
   ATTR_KIND_HYBRID_PATCHABLE = 95,
   ATTR_KIND_SANITIZE_REALTIME = 96,
-  ATTR_KIND_SANITIZE_REALTIME_UNSAFE = 97,
+  ATTR_KIND_SANITIZE_REALTIME_BLOCKING = 97,
   ATTR_KIND_CORO_ELIDE_SAFE = 98,
   ATTR_KIND_NO_EXT = 99,
   ATTR_KIND_NO_DIVERGENCE_SOURCE = 100,
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index db3b5cddd7c1c3c..b0316e67654dbc5 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1986,6 +1986,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::cos:
       ISD = ISD::FCOS;
       break;
+    case Intrinsic::sincos:
+      ISD = ISD::FSINCOS;
+      break;
     case Intrinsic::tan:
       ISD = ISD::FTAN;
       break;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 7b42722ca8d4f10..b4ff4cd178d7575 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/IR/Function.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include <bitset>
 #include <cstddef>
 #include <cstdint>
@@ -635,8 +636,12 @@ class GIMatchTableExecutor {
 
   bool shouldOptForSize(const MachineFunction *MF) const {
     const auto &F = MF->getFunction();
-    return F.hasOptSize() || F.hasMinSize() ||
-           (PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI));
+    if (F.hasOptSize())
+      return true;
+    if (CurMBB)
+      if (auto *BB = CurMBB->getBasicBlock())
+        return llvm::shouldOptimizeForSize(BB, PSI, BFI);
+    return false;
   }
 
 public:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index bcd44abb2088a04..6d71c150c8da6b8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -998,8 +998,7 @@ class LegalizeRuleSet {
         LegalizeAction::WidenScalar,
         [=](const LegalityQuery &Query) {
           const LLT VecTy = Query.Types[TypeIdx];
-          return VecTy.isVector() && !VecTy.isScalable() &&
-                 VecTy.getSizeInBits() < VectorSize;
+          return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
         },
         [=](const LegalityQuery &Query) {
           const LLT VecTy = Query.Types[TypeIdx];
@@ -1172,7 +1171,7 @@ class LegalizeRuleSet {
         LegalizeAction::MoreElements,
         [=](const LegalityQuery &Query) {
           LLT VecTy = Query.Types[TypeIdx];
-          return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+          return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
                  VecTy.getNumElements() < MinElements;
         },
         [=](const LegalityQuery &Query) {
@@ -1190,7 +1189,7 @@ class LegalizeRuleSet {
         LegalizeAction::MoreElements,
         [=](const LegalityQuery &Query) {
           LLT VecTy = Query.Types[TypeIdx];
-          return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+          return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
                  (VecTy.getNumElements() % NumElts != 0);
         },
         [=](const LegalityQuery &Query) {
@@ -1210,7 +1209,7 @@ class LegalizeRuleSet {
         LegalizeAction::FewerElements,
         [=](const LegalityQuery &Query) {
           LLT VecTy = Query.Types[TypeIdx];
-          return VecTy.isVector() && VecTy.getElementType() == EltTy &&
+          return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
                  VecTy.getNumElements() > MaxElements;
         },
         [=](const LegalityQuery &Query) {
@@ -1231,6 +1230,9 @@ class LegalizeRuleSet {
     assert(MinTy.getElementType() == MaxTy.getElementType() &&
            "Expected element types to agree");
 
+    assert((!MinTy.isScalableVector() && !MaxTy.isScalableVector()) &&
+           "Unexpected scalable vectors");
+
     const LLT EltTy = MinTy.getElementType();
     return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
         .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b993482c8cc072..ab3025e4923cd0c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -2009,6 +2009,13 @@ class MachineIRBuilder {
     return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags);
   }
 
+  /// Build and insert \p Sin, \p Cos = G_FSINCOS \p Src
+  MachineInstrBuilder
+  buildFSincos(const DstOp &Sin, const DstOp &Cos, const SrcOp &Src,
+               std::optional<unsigned> Flags = std::nullopt) {
+    return buildInstr(TargetOpcode::G_FSINCOS, {Sin, Cos}, {Src}, Flags);
+  }
+
   /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
   MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
                                      const SrcOp &Src1) {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 95a8234d3c60808..4016247376c4f65 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -542,10 +542,6 @@ bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector,
 /// TargetBooleanContents.
 int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
 
-/// Returns true if the given block should be optimized for size.
-bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
-                      BlockFrequencyInfo *BFI);
-
 using SmallInstListTy = GISelWorkList<4>;
 void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
                       LostDebugLocObserver *LocObserver,
diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index 84050bf17073776..486392ca3c49d5f 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -118,6 +118,16 @@ class LiveRegMatrix {
   /// the segment [Start, End).
   bool checkInterference(SlotIndex Start, SlotIndex End, MCRegister PhysReg);
 
+  /// Check for interference in the segment [Start, End) that may prevent
+  /// assignment to PhysReg, like checkInterference. Returns a lane mask of
+  /// which lanes of the physical register interfere in the segment [Start, End)
+  /// of some other interval already assigned to PhysReg.
+  ///
+  /// If this function returns LaneBitmask::getNone(), PhysReg is completely
+  /// free at the segment [Start, End).
+  LaneBitmask checkInterferenceLanes(SlotIndex Start, SlotIndex End,
+                                     MCRegister PhysReg);
+
   /// Assign VirtReg to PhysReg.
   /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and
   /// update VirtRegMap. The live range is expected to be available in PhysReg.
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 76a7b8662bae66c..360517324746341 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -1764,8 +1764,8 @@ class MachineInstr
   bool isDereferenceableInvariantLoad() const;
 
   /// If the specified instruction is a PHI that always merges together the
-  /// same virtual register, return the register, otherwise return 0.
-  unsigned isConstantValuePHI() const;
+  /// same virtual register, return the register, otherwise return Register().
+  Register isConstantValuePHI() const;
 
   /// Return true if this instruction has side effects that are not modeled
   /// by mayLoad / mayStore, etc.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index e12c1f076f133c9..d1c71fc95818c80 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -261,11 +261,11 @@ namespace llvm {
 
   /// TailDuplicate - Duplicate blocks with unconditional branches
   /// into tails of their predecessors.
-  extern char &TailDuplicateID;
+  extern char &TailDuplicateLegacyID;
 
   /// Duplicate blocks with unconditional branches into tails of their
   /// predecessors. Variant that works before register allocation.
-  extern char &EarlyTailDuplicateID;
+  extern char &EarlyTailDuplicateLegacyID;
 
   /// MachineTraceMetrics - This pass computes critical path and CPU resource
   /// usage in an ensemble of traces.
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index bda0120a2df4aa2..26488413fe5826e 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -378,36 +378,48 @@ template<> struct simplify_type<SDUse> {
 /// the backend.
 struct SDNodeFlags {
 private:
-  bool NoUnsignedWrap : 1;
-  bool NoSignedWrap : 1;
-  bool Exact : 1;
-  bool Disjoint : 1;
-  bool NonNeg : 1;
-  bool NoNaNs : 1;
-  bool NoInfs : 1;
-  bool NoSignedZeros : 1;
-  bool AllowReciprocal : 1;
-  bool AllowContract : 1;
-  bool ApproximateFuncs : 1;
-  bool AllowReassociation : 1;
-
-  // We assume instructions do not raise floating-point exceptions by default,
-  // and only those marked explicitly may do so.  We could choose to represent
-  // this via a positive "FPExcept" flags like on the MI level, but having a
-  // negative "NoFPExcept" flag here makes the flag intersection logic more
-  // straightforward.
-  bool NoFPExcept : 1;
-  // Instructions with attached 'unpredictable' metadata on IR level.
-  bool Unpredictable : 1;
+  friend class SDNode;
+
+  unsigned Flags = 0;
+
+  template <unsigned Flag> void setFlag(bool B) {
+    Flags = (Flags & ~Flag) | (B ? Flag : 0);
+  }
 
 public:
+  enum : unsigned {
+    None = 0,
+    NoUnsignedWrap = 1 << 0,
+    NoSignedWrap = 1 << 1,
+    Exact = 1 << 2,
+    Disjoint = 1 << 3,
+    NonNeg = 1 << 4,
+    NoNaNs = 1 << 5,
+    NoInfs = 1 << 6,
+    NoSignedZeros = 1 << 7,
+    AllowReciprocal = 1 << 8,
+    AllowContract = 1 << 9,
+    ApproximateFuncs = 1 << 10,
+    AllowReassociation = 1 << 11,
+
+    // We assume instructions do not raise floating-point exceptions by default,
+    // and only those marked explicitly may do so.  We could choose to represent
+    // this via a positive "FPExcept" flags like on the MI level, but having a
+    // negative "NoFPExcept" flag here makes the flag intersection logic more
+    // straightforward.
+    NoFPExcept = 1 << 12,
+    // Instructions with attached 'unpredictable' metadata on IR level.
+    Unpredictable = 1 << 13,
+
+    // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below
+    // the class definition when adding new flags.
+
+    PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint |
+                            NonNeg | NoNaNs | NoInfs,
+  };
+
   /// Default constructor turns off all optimization flags.
-  SDNodeFlags()
-      : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false),
-        Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false),
-        NoSignedZeros(false), AllowReciprocal(false), AllowContract(false),
-        ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false),
-        Unpredictable(false) {}
+  SDNodeFlags() : Flags(0) {}
 
   /// Propagate the fast-math-flags from an IR FPMathOperator.
   void copyFMF(const FPMathOperator &FPMO) {
@@ -421,71 +433,49 @@ struct SDNodeFlags {
   }
 
   // These are mutators for each flag.
-  void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
-  void setNoSignedWrap(bool b) { NoSignedWrap = b; }
-  void setExact(bool b) { Exact = b; }
-  void setDisjoint(bool b) { Disjoint = b; }
-  void setNonNeg(bool b) { NonNeg = b; }
-  void setNoNaNs(bool b) { NoNaNs = b; }
-  void setNoInfs(bool b) { NoInfs = b; }
-  void setNoSignedZeros(bool b) { NoSignedZeros = b; }
-  void setAllowReciprocal(bool b) { AllowReciprocal = b; }
-  void setAllowContract(bool b) { AllowContract = b; }
-  void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
-  void setAllowReassociation(bool b) { AllowReassociation = b; }
-  void setNoFPExcept(bool b) { NoFPExcept = b; }
-  void setUnpredictable(bool b) { Unpredictable = b; }
+  void setNoUnsignedWrap(bool b) { setFlag<NoUnsignedWrap>(b); }
+  void setNoSignedWrap(bool b) { setFlag<NoSignedWrap>(b); }
+  void setExact(bool b) { setFlag<Exact>(b); }
+  void setDisjoint(bool b) { setFlag<Disjoint>(b); }
+  void setNonNeg(bool b) { setFlag<NonNeg>(b); }
+  void setNoNaNs(bool b) { setFlag<NoNaNs>(b); }
+  void setNoInfs(bool b) { setFlag<NoInfs>(b); }
+  void setNoSignedZeros(bool b) { setFlag<NoSignedZeros>(b); }
+  void setAllowReciprocal(bool b) { setFlag<AllowReciprocal>(b); }
+  void setAllowContract(bool b) { setFlag<AllowContract>(b); }
+  void setApproximateFuncs(bool b) { setFlag<ApproximateFuncs>(b); }
+  void setAllowReassociation(bool b) { setFlag<AllowReassociation>(b); }
+  void setNoFPExcept(bool b) { setFlag<NoFPExcept>(b); }
+  void setUnpredictable(bool b) { setFlag<Unpredictable>(b); }
 
   // These are accessors for each flag.
-  bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
-  bool hasNoSignedWrap() const { return NoSignedWrap; }
-  bool hasExact() const { return Exact; }
-  bool hasDisjoint() const { return Disjoint; }
-  bool hasNonNeg() const { return NonNeg; }
-  bool hasNoNaNs() const { return NoNaNs; }
-  bool hasNoInfs() const { return NoInfs; }
-  bool hasNoSignedZeros() const { return NoSignedZeros; }
-  bool hasAllowReciprocal() const { return AllowReciprocal; }
-  bool hasAllowContract() const { return AllowContract; }
-  bool hasApproximateFuncs() const { return ApproximateFuncs; }
-  bool hasAllowReassociation() const { return AllowReassociation; }
-  bool hasNoFPExcept() const { return NoFPExcept; }
-  bool hasUnpredictable() const { return Unpredictable; }
+  bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; }
+  bool hasNoSignedWrap() const { return Flags & NoSignedWrap; }
+  bool hasExact() const { return Flags & Exact; }
+  bool hasDisjoint() const { return Flags & Disjoint; }
+  bool hasNonNeg() const { return Flags & NonNeg; }
+  bool hasNoNaNs() const { return Flags & NoNaNs; }
+  bool hasNoInfs() const { return Flags & NoInfs; }
+  bool hasNoSignedZeros() const { return Flags & NoSignedZeros; }
+  bool hasAllowReciprocal() const { return Flags & AllowReciprocal; }
+  bool hasAllowContract() const { return Flags & AllowContract; }
+  bool hasApproximateFuncs() const { return Flags & ApproximateFuncs; }
+  bool hasAllowReassociation() const { return Flags & AllowReassociation; }
+  bool hasNoFPExcept() const { return Flags & NoFPExcept; }
+  bool hasUnpredictable() const { return Flags & Unpredictable; }
 
   bool operator==(const SDNodeFlags &Other) const {
-    return NoUnsignedWrap == Other.NoUnsignedWrap &&
-           NoSignedWrap == Other.NoSignedWrap && Exact == Other.Exact &&
-           Disjoint == Other.Disjoint && NonNeg == Other.NonNeg &&
-           NoNaNs == Other.NoNaNs && NoInfs == Other.NoInfs &&
-           NoSignedZeros == Other.NoSignedZeros &&
-           AllowReciprocal == Other.AllowReciprocal &&
-           AllowContract == Other.AllowContract &&
-           ApproximateFuncs == Other.ApproximateFuncs &&
-           AllowReassociation == Other.AllowReassociation &&
-           NoFPExcept == Other.NoFPExcept &&
-           Unpredictable == Other.Unpredictable;
+    return Flags == Other.Flags;
   }
 
   /// Clear any flags in this flag set that aren't also set in Flags. All
   /// flags will be cleared if Flags are undefined.
-  void intersectWith(const SDNodeFlags Flags) {
-    NoUnsignedWrap &= Flags.NoUnsignedWrap;
-    NoSignedWrap &= Flags.NoSignedWrap;
-    Exact &= Flags.Exact;
-    Disjoint &= Flags.Disjoint;
-    NonNeg &= Flags.NonNeg;
-    NoNaNs &= Flags.NoNaNs;
-    NoInfs &= Flags.NoInfs;
-    NoSignedZeros &= Flags.NoSignedZeros;
-    AllowReciprocal &= Flags.AllowReciprocal;
-    AllowContract &= Flags.AllowContract;
-    ApproximateFuncs &= Flags.ApproximateFuncs;
-    AllowReassociation &= Flags.AllowReassociation;
-    NoFPExcept &= Flags.NoFPExcept;
-    Unpredictable &= Flags.Unpredictable;
-  }
+  void intersectWith(const SDNodeFlags Flags) { this->Flags &= Flags.Flags; }
 };
 
+LLVM_DECLARE_ENUM_AS_BITMASK(decltype(SDNodeFlags::None),
+                             SDNodeFlags::Unpredictable);
+
 /// Represents one node in the SelectionDAG.
 ///
 class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
@@ -1029,10 +1019,7 @@ END_TWO_BYTE_PACK()
   void intersectFlagsWith(const SDNodeFlags Flags);
 
   bool hasPoisonGeneratingFlags() const {
-    SDNodeFlags Flags = getFlags();
-    return Flags.hasNoUnsignedWrap() || Flags.hasNoSignedWrap() ||
-           Flags.hasExact() || Flags.hasDisjoint() || Flags.hasNonNeg() ||
-           Flags.hasNoNaNs() || Flags.hasNoInfs();
+    return Flags.Flags & SDNodeFlags::PoisonGeneratingFlags;
   }
 
   void setCFIType(uint32_t Type) { CFIType = Type; }
diff --git a/llvm/include/llvm/CodeGen/TailDuplication.h b/llvm/include/llvm/CodeGen/TailDuplication.h
new file mode 100644
index 000000000000000..687a592ccf2fbfe
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/TailDuplication.h
@@ -0,0 +1,47 @@
+//===- llvm/CodeGen/TailDuplication.h ---------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TAILDUPLICATIONPASS_H
+#define LLVM_CODEGEN_TAILDUPLICATIONPASS_H
+
+#include "llvm/CodeGen/MBFIWrapper.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+template <typename DerivedT, bool PreRegAlloc>
+class TailDuplicatePassBase : public PassInfoMixin<DerivedT> {
+private:
+  std::unique_ptr<MBFIWrapper> MBFIW;
+
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+class EarlyTailDuplicatePass
+    : public TailDuplicatePassBase<EarlyTailDuplicatePass, true> {
+public:
+  MachineFunctionProperties getClearedProperties() const {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+};
+
+class TailDuplicatePass
+    : public TailDuplicatePassBase<TailDuplicatePass, false> {};
+
+} // namespace llvm
+
+extern template class llvm::TailDuplicatePassBase<llvm::EarlyTailDuplicatePass,
+                                                  true>;
+extern template class llvm::TailDuplicatePassBase<llvm::TailDuplicatePass,
+                                                  false>;
+
+#endif // LLVM_CODEGEN_TAILDUPLICATIONPASS_H
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h
index 3db6f33a8093f06..4de109739227ad2 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -230,7 +230,8 @@ namespace llvm {
 
     /// Return true if this is an overloaded type for TableGen.
     bool isOverloaded() const {
-      return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny);
+      return (V == MVT::iAny || V == MVT::fAny || V == MVT::vAny ||
+              V == MVT::pAny);
     }
 
     /// Return true if the bit size is a multiple of 8.
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 493c0cfcab60ce4..6d6b92958b43218 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -338,9 +338,9 @@ def MetadataVT : ValueType<0, 505> { // Metadata
   let LLVMName = "Metadata";
 }
 
-// Pseudo valuetype mapped to the current pointer size to any address space.
+// Pseudo valuetype to represent "pointer to any address space"
 // Should only be used in TableGen.
-def iPTRAny    : VTAny<506>;
+def pAny       : VTAny<506>;
 
 // Pseudo valuetype to represent "vector of any size"
 // Should only be used in TableGen.
diff --git a/llvm/include/llvm/CodeGenTypes/MachineValueType.h b/llvm/include/llvm/CodeGenTypes/MachineValueType.h
index c9a5098ef1623ed..5c47ad4824a7911 100644
--- a/llvm/include/llvm/CodeGenTypes/MachineValueType.h
+++ b/llvm/include/llvm/CodeGenTypes/MachineValueType.h
@@ -320,7 +320,7 @@ namespace llvm {
         llvm_unreachable("Value type is non-standard value, Other.");
       case iPTR:
         llvm_unreachable("Value type size is target-dependent. Ask TLI.");
-      case iPTRAny:
+      case pAny:
       case iAny:
       case fAny:
       case vAny:
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
index b8432c4d26c68c0..bb8da0ab9db27a5 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO.h
@@ -14,6 +14,7 @@
 #define LLVM_EXECUTIONENGINE_JITLINK_MACHO_H
 
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h"
 
 namespace llvm {
 namespace jitlink {
@@ -33,6 +34,26 @@ createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer);
 void link_MachO(std::unique_ptr<LinkGraph> G,
                 std::unique_ptr<JITLinkContext> Ctx);
 
+/// Get a pointer to the standard MachO data section (creates an empty
+/// section with RW- permissions and standard lifetime if one does not
+/// already exist).
+inline Section &getMachODefaultRWDataSection(LinkGraph &G) {
+  if (auto *DataSec = G.findSectionByName(orc::MachODataDataSectionName))
+    return *DataSec;
+  return G.createSection(orc::MachODataDataSectionName,
+                         orc::MemProt::Read | orc::MemProt::Write);
+}
+
+/// Get a pointer to the standard MachO text section (creates an empty
+/// section with R-X permissions and standard lifetime if one does not
+/// already exist).
+inline Section &getMachODefaultTextSection(LinkGraph &G) {
+  if (auto *TextSec = G.findSectionByName(orc::MachOTextTextSectionName))
+    return *TextSec;
+  return G.createSection(orc::MachOTextTextSectionName,
+                         orc::MemProt::Read | orc::MemProt::Exec);
+}
+
 } // end namespace jitlink
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h
index 52f284c89bdade5..ef42cc5f798fd93 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h
@@ -26,12 +26,16 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager,
   /// Create redirection manager that uses JITLink based implementaion.
   static Expected<std::unique_ptr<RedirectableSymbolManager>>
   Create(ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD) {
-    Error Err = Error::success();
-    auto RM = std::unique_ptr<RedirectableSymbolManager>(
-        new JITLinkRedirectableSymbolManager(ObjLinkingLayer, JD, Err));
-    if (Err)
-      return Err;
-    return std::move(RM);
+    auto AnonymousPtrCreator(jitlink::getAnonymousPointerCreator(
+        ObjLinkingLayer.getExecutionSession().getTargetTriple()));
+    auto PtrJumpStubCreator(jitlink::getPointerJumpStubCreator(
+        ObjLinkingLayer.getExecutionSession().getTargetTriple()));
+    if (!AnonymousPtrCreator || !PtrJumpStubCreator)
+      return make_error<StringError>("Architecture not supported",
+                                     inconvertibleErrorCode());
+    return std::unique_ptr<RedirectableSymbolManager>(
+        new JITLinkRedirectableSymbolManager(
+            ObjLinkingLayer, JD, AnonymousPtrCreator, PtrJumpStubCreator));
   }
 
   void emitRedirectableSymbols(std::unique_ptr<MaterializationResponsibility> R,
@@ -52,18 +56,13 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager,
   constexpr static StringRef JumpStubTableName = "$IND_JUMP_";
   constexpr static StringRef StubPtrTableName = "$__IND_JUMP_PTRS";
 
-  JITLinkRedirectableSymbolManager(ObjectLinkingLayer &ObjLinkingLayer,
-                                   JITDylib &JD, Error &Err)
+  JITLinkRedirectableSymbolManager(
+      ObjectLinkingLayer &ObjLinkingLayer, JITDylib &JD,
+      jitlink::AnonymousPointerCreator &AnonymousPtrCreator,
+      jitlink::PointerJumpStubCreator &PtrJumpStubCreator)
       : ObjLinkingLayer(ObjLinkingLayer), JD(JD),
-        AnonymousPtrCreator(jitlink::getAnonymousPointerCreator(
-            ObjLinkingLayer.getExecutionSession().getTargetTriple())),
-        PtrJumpStubCreator(jitlink::getPointerJumpStubCreator(
-            ObjLinkingLayer.getExecutionSession().getTargetTriple())) {
-    if (!AnonymousPtrCreator || !PtrJumpStubCreator)
-      Err = make_error<StringError>("Architecture not supported",
-                                    inconvertibleErrorCode());
-    if (Err)
-      return;
+        AnonymousPtrCreator(std::move(AnonymousPtrCreator)),
+        PtrJumpStubCreator(std::move(PtrJumpStubCreator)) {
     ObjLinkingLayer.getExecutionSession().registerResourceManager(*this);
   }
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h b/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h
index 4004c42d9146843..f2ea1f5b64c5332 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/RedirectionManager.h
@@ -32,8 +32,8 @@ class RedirectionManager {
 
   /// Change the redirection destination of given symbol to new destination
   /// symbol.
-  virtual Error redirect(JITDylib &JD, SymbolStringPtr Symbol,
-                         ExecutorSymbolDef NewDest) {
+  Error redirect(JITDylib &JD, SymbolStringPtr Symbol,
+                 ExecutorSymbolDef NewDest) {
     return redirect(JD, {{Symbol, NewDest}});
   }
 
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h
index f886203f8e3fb57..b927dfbce992a0d 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MachOObjectFormat.h
@@ -49,6 +49,7 @@ extern StringRef MachOSwift5TypesSectionName;
 extern StringRef MachOSwift5TypeRefSectionName;
 extern StringRef MachOSwift5FieldMetadataSectionName;
 extern StringRef MachOSwift5EntrySectionName;
+extern StringRef MachOTextTextSectionName;
 extern StringRef MachOThreadBSSSectionName;
 extern StringRef MachOThreadDataSectionName;
 extern StringRef MachOThreadVarsSectionName;
diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
index ac34ddafc5e726e..2a890905dc6323e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
+++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h
@@ -503,7 +503,7 @@ struct DependT {
   using LocatorList = ObjectListT<I, E>;
   using TaskDependenceType = tomp::type::TaskDependenceType;
 
-  struct WithLocators { // Modern form
+  struct DepType { // The form with task dependence type.
     using TupleTrait = std::true_type;
     // Empty LocatorList means "omp_all_memory".
     std::tuple<TaskDependenceType, OPT(Iterator), LocatorList> t;
@@ -511,7 +511,7 @@ struct DependT {
 
   using Doacross = DoacrossT<T, I, E>;
   using UnionTrait = std::true_type;
-  std::variant<Doacross, WithLocators> u; // Doacross form is legacy
+  std::variant<Doacross, DepType> u; // Doacross form is legacy
 };
 
 // V5.2: [3.5] `destroy` clause
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 1834ad4d037f3d9..70179bab4757790 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -185,10 +185,10 @@ def OMPC_Full: Clause<"full"> {
   let clangClass = "OMPFullClause";
 }
 def OMP_GRAINSIZE_Strict : ClauseVal<"strict", 1, 1> {}
-def OMP_GRAINSIZE_Unknown : ClauseVal<"unkonwn", 2, 0> { let isDefault = 1; }
+def OMP_GRAINSIZE_Unknown : ClauseVal<"unknown", 2, 0> { let isDefault = 1; }
 def OMPC_GrainSize : Clause<"grainsize"> {
   let clangClass = "OMPGrainsizeClause";
-  let flangClass = "ScalarIntExpr";
+  let flangClass = "OmpGrainsizeClause";
   let enumClauseValue = "GrainsizeType";
   let allowedClauseValues = [
     OMP_GRAINSIZE_Strict,
@@ -301,10 +301,10 @@ def OMPC_NoWait : Clause<"nowait"> {
   let clangClass = "OMPNowaitClause";
 }
 def OMP_NUMTASKS_Strict : ClauseVal<"strict", 1, 1> {}
-def OMP_NUMTASKS_Unknown : ClauseVal<"unkonwn", 2, 0> { let isDefault = 1; }
+def OMP_NUMTASKS_Unknown : ClauseVal<"unknown", 2, 0> { let isDefault = 1; }
 def OMPC_NumTasks : Clause<"num_tasks"> {
   let clangClass = "OMPNumTasksClause";
-  let flangClass = "ScalarIntExpr";
+  let flangClass = "OmpNumTasksClause";
   let enumClauseValue = "NumTasksType";
   let allowedClauseValues = [
     OMP_NUMTASKS_Strict,
@@ -892,7 +892,7 @@ def OMP_Scan : Directive<"scan"> {
   let association = AS_Separating;
   let category = CA_Subsidiary;
 }
-def OMP_scope : Directive<"scope"> {
+def OMP_Scope : Directive<"scope"> {
   let allowedClauses = [
     VersionedClause<OMPC_Private, 51>,
     VersionedClause<OMPC_Reduction, 51>,
@@ -905,6 +905,14 @@ def OMP_scope : Directive<"scope"> {
   let association = AS_Block;
   let category = CA_Executable;
 }
+def OMP_EndScope : Directive<"end scope"> {
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_NoWait>,
+  ];
+  let leafConstructs = OMP_Scope.leafConstructs;
+  let association = OMP_Scope.association;
+  let category = OMP_Scope.category;
+}
 def OMP_Section : Directive<"section"> {
   let association = AS_Separating;
   let category = CA_Subsidiary;
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index c4735ec41e71340..3afb9d84278e81a 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -520,6 +520,9 @@ class OpenMPIRBuilder {
   /// Type used throughout for insertion points.
   using InsertPointTy = IRBuilder<>::InsertPoint;
 
+  /// Type used to represent an insertion point or an error value.
+  using InsertPointOrErrorTy = Expected<InsertPointTy>;
+
   /// Get the create a name using the platform specific separators.
   /// \param Parts parts of the final name that needs separation
   /// The created name has a first separator between the first and second part
@@ -538,7 +541,7 @@ class OpenMPIRBuilder {
   /// A finalize callback knows about all objects that need finalization, e.g.
   /// destruction, when the scope of the currently generated construct is left
   /// at the time, and location, the callback is invoked.
-  using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
+  using FinalizeCallbackTy = std::function<Error(InsertPointTy CodeGenIP)>;
 
   struct FinalizationInfo {
     /// The finalization callback provided by the last in-flight invocation of
@@ -589,15 +592,19 @@ class OpenMPIRBuilder {
   ///                 not be split.
   /// \param CodeGenIP is the insertion point at which the body code should be
   ///                  placed.
+  ///
+  /// \return an error, if any were triggered during execution.
   using BodyGenCallbackTy =
-      function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
+      function_ref<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
 
   // This is created primarily for sections construct as llvm::function_ref
   // (BodyGenCallbackTy) is not storable (as described in the comments of
   // function_ref class - function_ref contains non-ownable reference
   // to the callable.
+  ///
+  /// \return an error, if any were triggered during execution.
   using StorableBodyGenCallbackTy =
-      std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
+      std::function<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
 
   /// Callback type for loop body code generation.
   ///
@@ -607,8 +614,10 @@ class OpenMPIRBuilder {
   ///                  terminated with an unconditional branch to the loop
   ///                  latch.
   /// \param IndVar    is the induction variable usable at the insertion point.
+  ///
+  /// \return an error, if any were triggered during execution.
   using LoopBodyGenCallbackTy =
-      function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
+      function_ref<Error(InsertPointTy CodeGenIP, Value *IndVar)>;
 
   /// Callback type for variable privatization (think copy & default
   /// constructor).
@@ -628,7 +637,7 @@ class OpenMPIRBuilder {
   ///
   /// \returns The new insertion point where code generation continues and
   ///          \p ReplVal the replacement value.
-  using PrivatizeCallbackTy = function_ref<InsertPointTy(
+  using PrivatizeCallbackTy = function_ref<InsertPointOrErrorTy(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
       Value &Inner, Value *&ReplVal)>;
 
@@ -658,9 +667,10 @@ class OpenMPIRBuilder {
   /// \param ThreadID Optional parameter to pass in any existing ThreadID value.
   ///
   /// \returns The insertion point after the barrier.
-  InsertPointTy createBarrier(const LocationDescription &Loc,
-                              omp::Directive Kind, bool ForceSimpleCall = false,
-                              bool CheckCancelFlag = true);
+  InsertPointOrErrorTy createBarrier(const LocationDescription &Loc,
+                                     omp::Directive Kind,
+                                     bool ForceSimpleCall = false,
+                                     bool CheckCancelFlag = true);
 
   /// Generator for '#omp cancel'
   ///
@@ -669,8 +679,9 @@ class OpenMPIRBuilder {
   /// \param CanceledDirective The kind of directive that is cancled.
   ///
   /// \returns The insertion point after the barrier.
-  InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
-                             omp::Directive CanceledDirective);
+  InsertPointOrErrorTy createCancel(const LocationDescription &Loc,
+                                    Value *IfCondition,
+                                    omp::Directive CanceledDirective);
 
   /// Generator for '#omp parallel'
   ///
@@ -685,7 +696,7 @@ class OpenMPIRBuilder {
   /// \param IsCancellable Flag to indicate a cancellable parallel region.
   ///
   /// \returns The insertion position *after* the parallel.
-  IRBuilder<>::InsertPoint
+  InsertPointOrErrorTy
   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
                  FinalizeCallbackTy FiniCB, Value *IfCondition,
@@ -711,10 +722,10 @@ class OpenMPIRBuilder {
   ///
   /// \returns An object representing the created control flow structure which
   ///          can be used for loop-associated directives.
-  CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
-                                         LoopBodyGenCallbackTy BodyGenCB,
-                                         Value *TripCount,
-                                         const Twine &Name = "loop");
+  Expected<CanonicalLoopInfo *>
+  createCanonicalLoop(const LocationDescription &Loc,
+                      LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
+                      const Twine &Name = "loop");
 
   /// Generator for the control flow structure of an OpenMP canonical loop.
   ///
@@ -764,12 +775,10 @@ class OpenMPIRBuilder {
   ///
   /// \returns An object representing the created control flow structure which
   ///          can be used for loop-associated directives.
-  CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
-                                         LoopBodyGenCallbackTy BodyGenCB,
-                                         Value *Start, Value *Stop, Value *Step,
-                                         bool IsSigned, bool InclusiveStop,
-                                         InsertPointTy ComputeIP = {},
-                                         const Twine &Name = "loop");
+  Expected<CanonicalLoopInfo *> createCanonicalLoop(
+      const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+      Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+      InsertPointTy ComputeIP = {}, const Twine &Name = "loop");
 
   /// Collapse a loop nest into a single loop.
   ///
@@ -996,9 +1005,10 @@ class OpenMPIRBuilder {
   ///                     the loop.
   ///
   /// \returns Point where to insert code after the workshare construct.
-  InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
-                                         InsertPointTy AllocaIP,
-                                         bool NeedsBarrier);
+  InsertPointOrErrorTy applyStaticWorkshareLoop(DebugLoc DL,
+                                                CanonicalLoopInfo *CLI,
+                                                InsertPointTy AllocaIP,
+                                                bool NeedsBarrier);
 
   /// Modifies the canonical loop a statically-scheduled workshare loop with a
   /// user-specified chunk size.
@@ -1013,11 +1023,11 @@ class OpenMPIRBuilder {
   /// \param ChunkSize    The user-specified chunk size.
   ///
   /// \returns Point where to insert code after the workshare construct.
-  InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
-                                                CanonicalLoopInfo *CLI,
-                                                InsertPointTy AllocaIP,
-                                                bool NeedsBarrier,
-                                                Value *ChunkSize);
+  InsertPointOrErrorTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
+                                                       CanonicalLoopInfo *CLI,
+                                                       InsertPointTy AllocaIP,
+                                                       bool NeedsBarrier,
+                                                       Value *ChunkSize);
 
   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
   ///
@@ -1039,11 +1049,12 @@ class OpenMPIRBuilder {
   ///                 scheduling. If \p nullptr, defaults to 1.
   ///
   /// \returns Point where to insert code after the workshare construct.
-  InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
-                                          InsertPointTy AllocaIP,
-                                          omp::OMPScheduleType SchedType,
-                                          bool NeedsBarrier,
-                                          Value *Chunk = nullptr);
+  InsertPointOrErrorTy applyDynamicWorkshareLoop(DebugLoc DL,
+                                                 CanonicalLoopInfo *CLI,
+                                                 InsertPointTy AllocaIP,
+                                                 omp::OMPScheduleType SchedType,
+                                                 bool NeedsBarrier,
+                                                 Value *Chunk = nullptr);
 
   /// Create alternative version of the loop to support if clause
   ///
@@ -1094,7 +1105,7 @@ class OpenMPIRBuilder {
   ///                 It corresponds to type of loop workshare OpenMP pragma.
   ///
   /// \returns Point where to insert code after the workshare construct.
-  InsertPointTy applyWorkshareLoop(
+  InsertPointOrErrorTy applyWorkshareLoop(
       DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
       bool NeedsBarrier,
       llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
@@ -1251,20 +1262,21 @@ class OpenMPIRBuilder {
   ///                    cannot be resumed until execution of the structured
   ///                    block that is associated with the generated task is
   ///                    completed.
-  InsertPointTy createTask(const LocationDescription &Loc,
-                           InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
-                           bool Tied = true, Value *Final = nullptr,
-                           Value *IfCondition = nullptr,
-                           SmallVector<DependData> Dependencies = {});
+  InsertPointOrErrorTy createTask(const LocationDescription &Loc,
+                                  InsertPointTy AllocaIP,
+                                  BodyGenCallbackTy BodyGenCB, bool Tied = true,
+                                  Value *Final = nullptr,
+                                  Value *IfCondition = nullptr,
+                                  SmallVector<DependData> Dependencies = {});
 
   /// Generator for the taskgroup construct
   ///
   /// \param Loc The location where the taskgroup construct was encountered.
   /// \param AllocaIP The insertion point to be used for alloca instructions.
   /// \param BodyGenCB Callback that will generate the region code.
-  InsertPointTy createTaskgroup(const LocationDescription &Loc,
-                                InsertPointTy AllocaIP,
-                                BodyGenCallbackTy BodyGenCB);
+  InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc,
+                                       InsertPointTy AllocaIP,
+                                       BodyGenCallbackTy BodyGenCB);
 
   using FileIdentifierInfoCallbackTy =
       std::function<std::tuple<std::string, uint64_t>()>;
@@ -1302,15 +1314,15 @@ class OpenMPIRBuilder {
   /// \param CodeGenIP InsertPoint for CodeGen.
   /// \param LHS Pass in the LHS Value to be used for CodeGen.
   /// \param RHS Pass in the RHS Value to be used for CodeGen.
-  using ReductionGenCBTy = std::function<InsertPointTy(
+  using ReductionGenCBTy = std::function<InsertPointOrErrorTy(
       InsertPointTy CodeGenIP, Value *LHS, Value *RHS, Value *&Res)>;
 
   /// Functions used to generate atomic reductions. Such functions take two
   /// Values representing pointers to LHS and RHS of the reduction, as well as
   /// the element type of these pointers. They are expected to atomically
   /// update the LHS to the reduced value.
-  using ReductionGenAtomicCBTy =
-      std::function<InsertPointTy(InsertPointTy, Type *, Value *, Value *)>;
+  using ReductionGenAtomicCBTy = std::function<InsertPointOrErrorTy(
+      InsertPointTy, Type *, Value *, Value *)>;
 
   /// Enum class for reduction evaluation types scalar, complex and aggregate.
   enum class EvalKind { Scalar, Complex, Aggregate };
@@ -1510,9 +1522,10 @@ class OpenMPIRBuilder {
   ///                  need to be copied to the new function.
   ///
   /// \return The InterWarpCopy function.
-  Function *emitInterWarpCopyFunction(const LocationDescription &Loc,
-                                      ArrayRef<ReductionInfo> ReductionInfos,
-                                      AttributeList FuncAttrs);
+  Expected<Function *>
+  emitInterWarpCopyFunction(const LocationDescription &Loc,
+                            ArrayRef<ReductionInfo> ReductionInfos,
+                            AttributeList FuncAttrs);
 
   /// This function emits a helper that copies all the reduction variables from
   /// the team into the provided global buffer for the reduction variables.
@@ -1604,7 +1617,7 @@ class OpenMPIRBuilder {
   ///                  need to be copied to the new function.
   ///
   /// \return The reduction function.
-  Function *createReductionFunction(
+  Expected<Function *> createReductionFunction(
       StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
       ReductionGenCBKind ReductionGenCBKind = ReductionGenCBKind::MLIR,
       AttributeList FuncAttrs = {});
@@ -1871,7 +1884,7 @@ class OpenMPIRBuilder {
   /// \param ReductionBufNum    Optional OpenMPCUDAReductionBufNumValue to be
   /// used for teams reduction.
   /// \param SrcLocInfo         Source location information global.
-  InsertPointTy createReductionsGPU(
+  InsertPointOrErrorTy createReductionsGPU(
       const LocationDescription &Loc, InsertPointTy AllocaIP,
       InsertPointTy CodeGenIP, ArrayRef<ReductionInfo> ReductionInfos,
       bool IsNoWait = false, bool IsTeamsReduction = false,
@@ -1943,10 +1956,11 @@ class OpenMPIRBuilder {
   /// \param IsNoWait           A flag set if the reduction is marked as nowait.
   /// \param IsByRef            A flag set if the reduction is using reference
   /// or direct value.
-  InsertPointTy createReductions(const LocationDescription &Loc,
-                                 InsertPointTy AllocaIP,
-                                 ArrayRef<ReductionInfo> ReductionInfos,
-                                 ArrayRef<bool> IsByRef, bool IsNoWait = false);
+  InsertPointOrErrorTy createReductions(const LocationDescription &Loc,
+                                        InsertPointTy AllocaIP,
+                                        ArrayRef<ReductionInfo> ReductionInfos,
+                                        ArrayRef<bool> IsByRef,
+                                        bool IsNoWait = false);
 
   ///}
 
@@ -2002,9 +2016,11 @@ class OpenMPIRBuilder {
   /// \param CancelFlag Flag indicating if the cancellation is performed.
   /// \param CanceledDirective The kind of directive that is cancled.
   /// \param ExitCB Extra code to be generated in the exit block.
-  void emitCancelationCheckImpl(Value *CancelFlag,
-                                omp::Directive CanceledDirective,
-                                FinalizeCallbackTy ExitCB = {});
+  ///
+  /// \return an error, if any were triggered during execution.
+  Error emitCancelationCheckImpl(Value *CancelFlag,
+                                 omp::Directive CanceledDirective,
+                                 FinalizeCallbackTy ExitCB = {});
 
   /// Generate a target region entry call.
   ///
@@ -2135,8 +2151,10 @@ class OpenMPIRBuilder {
   /// } else {
   ///   ElseGen();
   /// }
-  void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
-                    BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
+  ///
+  /// \return an error, if any were triggered during execution.
+  Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
+                     BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
 
   /// Create the global variable holding the offload mappings information.
   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
@@ -2340,7 +2358,8 @@ class OpenMPIRBuilder {
   /// is executed when the kernel launch fails. It takes an insertion point as
   /// parameter where the code should be emitted. It returns an insertion point
   /// that points right after after the emitted code.
-  using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>;
+  using EmitFallbackCallbackTy =
+      function_ref<InsertPointOrErrorTy(InsertPointTy)>;
 
   /// Generate a target region entry call and host fallback call.
   ///
@@ -2352,7 +2371,7 @@ class OpenMPIRBuilder {
   /// \param DeviceID Identifier for the device via the 'device' clause.
   /// \param RTLoc Source location identifier
   /// \param AllocaIP The insertion point to be used for alloca instructions.
-  InsertPointTy
+  InsertPointOrErrorTy
   emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID,
                    EmitFallbackCallbackTy EmitTargetCallFallbackCB,
                    TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc,
@@ -2366,9 +2385,11 @@ class OpenMPIRBuilder {
   /// \param RTLoc Source location identifier
   /// \Param TargetTaskAllocaIP Insertion point for the alloca block of the
   ///        generated task.
+  ///
+  /// \return an error, if any were triggered during execution.
   using TargetTaskBodyCallbackTy =
-      function_ref<void(Value *DeviceID, Value *RTLoc,
-                        IRBuilderBase::InsertPoint TargetTaskAllocaIP)>;
+      function_ref<Error(Value *DeviceID, Value *RTLoc,
+                         IRBuilderBase::InsertPoint TargetTaskAllocaIP)>;
 
   /// Generate a target-task for the target construct
   ///
@@ -2380,7 +2401,7 @@ class OpenMPIRBuilder {
   ///        dependencies as specified by the 'depend' clause.
   /// \param HasNoWait True if the target construct had 'nowait' on it, false
   ///        otherwise
-  InsertPointTy emitTargetTask(
+  InsertPointOrErrorTy emitTargetTask(
       TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
       OpenMPIRBuilder::InsertPointTy AllocaIP,
       const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
@@ -2478,11 +2499,11 @@ class OpenMPIRBuilder {
   /// \param CPFuncs copy functions to use for each copyprivate variable.
   ///
   /// \returns The insertion position *after* the single call.
-  InsertPointTy createSingle(const LocationDescription &Loc,
-                             BodyGenCallbackTy BodyGenCB,
-                             FinalizeCallbackTy FiniCB, bool IsNowait,
-                             ArrayRef<llvm::Value *> CPVars = {},
-                             ArrayRef<llvm::Function *> CPFuncs = {});
+  InsertPointOrErrorTy createSingle(const LocationDescription &Loc,
+                                    BodyGenCallbackTy BodyGenCB,
+                                    FinalizeCallbackTy FiniCB, bool IsNowait,
+                                    ArrayRef<llvm::Value *> CPVars = {},
+                                    ArrayRef<llvm::Function *> CPFuncs = {});
 
   /// Generator for '#omp master'
   ///
@@ -2491,9 +2512,9 @@ class OpenMPIRBuilder {
   /// \param FiniCB Callback to finalize variable copies.
   ///
   /// \returns The insertion position *after* the master.
-  InsertPointTy createMaster(const LocationDescription &Loc,
-                             BodyGenCallbackTy BodyGenCB,
-                             FinalizeCallbackTy FiniCB);
+  InsertPointOrErrorTy createMaster(const LocationDescription &Loc,
+                                    BodyGenCallbackTy BodyGenCB,
+                                    FinalizeCallbackTy FiniCB);
 
   /// Generator for '#omp masked'
   ///
@@ -2502,9 +2523,9 @@ class OpenMPIRBuilder {
   /// \param FiniCB Callback to finialize variable copies.
   ///
   /// \returns The insertion position *after* the masked.
-  InsertPointTy createMasked(const LocationDescription &Loc,
-                             BodyGenCallbackTy BodyGenCB,
-                             FinalizeCallbackTy FiniCB, Value *Filter);
+  InsertPointOrErrorTy createMasked(const LocationDescription &Loc,
+                                    BodyGenCallbackTy BodyGenCB,
+                                    FinalizeCallbackTy FiniCB, Value *Filter);
 
   /// Generator for '#omp critical'
   ///
@@ -2515,10 +2536,10 @@ class OpenMPIRBuilder {
   /// \param HintInst Hint Instruction for hint clause associated with critical
   ///
   /// \returns The insertion position *after* the critical.
-  InsertPointTy createCritical(const LocationDescription &Loc,
-                               BodyGenCallbackTy BodyGenCB,
-                               FinalizeCallbackTy FiniCB,
-                               StringRef CriticalName, Value *HintInst);
+  InsertPointOrErrorTy createCritical(const LocationDescription &Loc,
+                                      BodyGenCallbackTy BodyGenCB,
+                                      FinalizeCallbackTy FiniCB,
+                                      StringRef CriticalName, Value *HintInst);
 
   /// Generator for '#omp ordered depend (source | sink)'
   ///
@@ -2544,10 +2565,10 @@ class OpenMPIRBuilder {
   /// otherwise, with simd clause;
   ///
   /// \returns The insertion position *after* the ordered.
-  InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
-                                         BodyGenCallbackTy BodyGenCB,
-                                         FinalizeCallbackTy FiniCB,
-                                         bool IsThreads);
+  InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc,
+                                                BodyGenCallbackTy BodyGenCB,
+                                                FinalizeCallbackTy FiniCB,
+                                                bool IsThreads);
 
   /// Generator for '#omp sections'
   ///
@@ -2560,12 +2581,11 @@ class OpenMPIRBuilder {
   /// \param IsNowait If true, barrier - to ensure all sections are executed
   /// before moving forward will not be generated.
   /// \returns The insertion position *after* the sections.
-  InsertPointTy createSections(const LocationDescription &Loc,
-                               InsertPointTy AllocaIP,
-                               ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
-                               PrivatizeCallbackTy PrivCB,
-                               FinalizeCallbackTy FiniCB, bool IsCancellable,
-                               bool IsNowait);
+  InsertPointOrErrorTy
+  createSections(const LocationDescription &Loc, InsertPointTy AllocaIP,
+                 ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
+                 PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB,
+                 bool IsCancellable, bool IsNowait);
 
   /// Generator for '#omp section'
   ///
@@ -2573,9 +2593,9 @@ class OpenMPIRBuilder {
   /// \param BodyGenCB Callback that will generate the region body code.
   /// \param FiniCB Callback to finalize variable copies.
   /// \returns The insertion position *after* the section.
-  InsertPointTy createSection(const LocationDescription &Loc,
-                              BodyGenCallbackTy BodyGenCB,
-                              FinalizeCallbackTy FiniCB);
+  InsertPointOrErrorTy createSection(const LocationDescription &Loc,
+                                     BodyGenCallbackTy BodyGenCB,
+                                     FinalizeCallbackTy FiniCB);
 
   /// Generator for `#omp teams`
   ///
@@ -2589,7 +2609,7 @@ class OpenMPIRBuilder {
   ///        contention group created by each team.
   /// \param IfExpr is the integer argument value of the if condition on the
   ///        teams clause.
-  InsertPointTy
+  InsertPointOrErrorTy
   createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
               Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
               Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
@@ -2764,7 +2784,8 @@ class OpenMPIRBuilder {
 
 public:
   /// Functions used to generate a function with the given name.
-  using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
+  using FunctionGenCallback =
+      std::function<Expected<Function *>(StringRef FunctionName)>;
 
   /// Create a unique name for the entry function using the source location
   /// information of the current target region. The name will be something like:
@@ -2797,10 +2818,10 @@ class OpenMPIRBuilder {
   /// \param GenerateFunctionCallback The callback function to generate the code
   /// \param OutlinedFunction Pointer to the outlined function
   /// \param EntryFnIDName Name of the ID o be created
-  void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
-                                FunctionGenCallback &GenerateFunctionCallback,
-                                bool IsOffloadEntry, Function *&OutlinedFn,
-                                Constant *&OutlinedFnID);
+  Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
+                                 FunctionGenCallback &GenerateFunctionCallback,
+                                 bool IsOffloadEntry, Function *&OutlinedFn,
+                                 Constant *&OutlinedFnID);
 
   /// Registers the given function and sets up the attribtues of the function
   /// Returns the FunctionID.
@@ -2851,22 +2872,22 @@ class OpenMPIRBuilder {
   /// use_device_ptr and use_device_addr.
   /// \param CustomMapperCB Optional callback to generate code related to
   /// custom mappers.
-  OpenMPIRBuilder::InsertPointTy createTargetData(
+  InsertPointOrErrorTy createTargetData(
       const LocationDescription &Loc, InsertPointTy AllocaIP,
       InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
       TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
       omp::RuntimeFunction *MapperFunc = nullptr,
-      function_ref<InsertPointTy(InsertPointTy CodeGenIP,
-                                 BodyGenTy BodyGenType)>
+      function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
+                                        BodyGenTy BodyGenType)>
           BodyGenCB = nullptr,
       function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
       function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
       Value *SrcLocInfo = nullptr);
 
-  using TargetBodyGenCallbackTy = function_ref<InsertPointTy(
+  using TargetBodyGenCallbackTy = function_ref<InsertPointOrErrorTy(
       InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
 
-  using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointTy(
+  using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointOrErrorTy(
       Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
       InsertPointTy CodeGenIP)>;
 
@@ -2887,7 +2908,7 @@ class OpenMPIRBuilder {
   /// \param Dependencies A vector of DependData objects that carry
   // dependency information as passed in the depend clause
   // \param HasNowait Whether the target construct has a `nowait` clause or not.
-  InsertPointTy createTarget(
+  InsertPointOrErrorTy createTarget(
       const LocationDescription &Loc, bool IsOffloadEntry,
       OpenMPIRBuilder::InsertPointTy AllocaIP,
       OpenMPIRBuilder::InsertPointTy CodeGenIP,
@@ -2969,10 +2990,10 @@ class OpenMPIRBuilder {
   ///        should be called.
   ///
   /// \return The insertion position in exit block
-  InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
-                                        InsertPointTy FinIP,
-                                        Instruction *ExitCall,
-                                        bool HasFinalize = true);
+  InsertPointOrErrorTy emitCommonDirectiveExit(omp::Directive OMPD,
+                                               InsertPointTy FinIP,
+                                               Instruction *ExitCall,
+                                               bool HasFinalize = true);
 
   /// Common Interface to generate OMP inlined regions
   ///
@@ -2990,8 +3011,7 @@ class OpenMPIRBuilder {
   /// \param IsCancellable if HasFinalize is set to true, indicate if the
   ///        the directive should be cancellable.
   /// \return The insertion point after the region
-
-  InsertPointTy
+  InsertPointOrErrorTy
   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
                        FinalizeCallbackTy FiniCB, bool Conditional = false,
@@ -3027,7 +3047,7 @@ class OpenMPIRBuilder {
   ///
   /// \returns Value to update X to.
   using AtomicUpdateCallbackTy =
-      const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
+      const function_ref<Expected<Value *>(Value *XOld, IRBuilder<> &IRB)>;
 
 private:
   enum AtomicKind { Read, Write, Update, Capture, Compare };
@@ -3066,7 +3086,7 @@ class OpenMPIRBuilder {
   ///
   /// \returns A pair of the old value of X before the update, and the value
   ///          used for the update.
-  std::pair<Value *, Value *>
+  Expected<std::pair<Value *, Value *>>
   emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
                    AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
                    AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
@@ -3143,12 +3163,11 @@ class OpenMPIRBuilder {
   ///	                    (e.g. true for X = X BinOp Expr)
   ///
   /// \return Insertion point after generated atomic update IR.
-  InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
-                                   InsertPointTy AllocaIP, AtomicOpValue &X,
-                                   Value *Expr, AtomicOrdering AO,
-                                   AtomicRMWInst::BinOp RMWOp,
-                                   AtomicUpdateCallbackTy &UpdateOp,
-                                   bool IsXBinopExpr);
+  InsertPointOrErrorTy
+  createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP,
+                     AtomicOpValue &X, Value *Expr, AtomicOrdering AO,
+                     AtomicRMWInst::BinOp RMWOp,
+                     AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr);
 
   /// Emit atomic update for constructs: --- Only Scalar data types
   /// V = X; X = X BinOp Expr ,
@@ -3179,7 +3198,7 @@ class OpenMPIRBuilder {
   ///                        'v', not an updated one.
   ///
   /// \return Insertion point after generated atomic capture IR.
-  InsertPointTy
+  InsertPointOrErrorTy
   createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP,
                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index b6d36a5f7ae4fb3..49f4527bde66e7c 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -334,7 +334,7 @@ def SanitizeRealtime : EnumAttr<"sanitize_realtime", IntersectPreserve, [FnAttr]
 
 /// RealtimeSanitizer should error if a real-time unsafe function is invoked
 /// during a real-time sanitized function (see `sanitize_realtime`).
-def SanitizeRealtimeUnsafe : EnumAttr<"sanitize_realtime_unsafe", IntersectPreserve, [FnAttr]>;
+def SanitizeRealtimeBlocking : EnumAttr<"sanitize_realtime_blocking", IntersectPreserve, [FnAttr]>;
 
 /// Speculative Load Hardening is enabled.
 ///
@@ -430,7 +430,7 @@ def : CompatRule<"isEqual<SanitizeHWAddressAttr>">;
 def : CompatRule<"isEqual<SanitizeMemTagAttr>">;
 def : CompatRule<"isEqual<SanitizeNumericalStabilityAttr>">;
 def : CompatRule<"isEqual<SanitizeRealtimeAttr>">;
-def : CompatRule<"isEqual<SanitizeRealtimeUnsafeAttr>">;
+def : CompatRule<"isEqual<SanitizeRealtimeBlockingAttr>">;
 def : CompatRule<"isEqual<SafeStackAttr>">;
 def : CompatRule<"isEqual<ShadowCallStackAttr>">;
 def : CompatRule<"isEqual<UseSampleProfileAttr>">;
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 8f7ab2f9df389ef..93bd519f5727d80 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -78,7 +78,11 @@ class DataLayout {
     Align ABIAlign;
     Align PrefAlign;
     uint32_t IndexBitWidth;
-
+    /// Pointers in this address space don't have a well-defined bitwise
+    /// representation (e.g. may be relocated by a copying garbage collector).
+    /// Additionally, they may also be non-integral (i.e. containing additional
+    /// metadata such as bounds information/permissions).
+    bool IsNonIntegral;
     bool operator==(const PointerSpec &Other) const;
   };
 
@@ -133,10 +137,6 @@ class DataLayout {
   // The StructType -> StructLayout map.
   mutable void *LayoutMap = nullptr;
 
-  /// Pointers in these address spaces are non-integral, and don't have a
-  /// well-defined bitwise representation.
-  SmallVector<unsigned, 8> NonIntegralAddressSpaces;
-
   /// Sets or updates the specification for the given primitive type.
   void setPrimitiveSpec(char Specifier, uint32_t BitWidth, Align ABIAlign,
                         Align PrefAlign);
@@ -147,7 +147,8 @@ class DataLayout {
 
   /// Sets or updates the specification for pointer in the given address space.
   void setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
-                      Align PrefAlign, uint32_t IndexBitWidth);
+                      Align PrefAlign, uint32_t IndexBitWidth,
+                      bool IsNonIntegral);
 
   /// Internal helper to get alignment for integer of given bitwidth.
   Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -165,7 +166,8 @@ class DataLayout {
   Error parsePointerSpec(StringRef Spec);
 
   /// Attempts to parse a single specification.
-  Error parseSpecification(StringRef Spec);
+  Error parseSpecification(StringRef Spec,
+                           SmallVectorImpl<unsigned> &NonIntegralAddressSpaces);
 
   /// Attempts to parse a data layout string.
   Error parseLayoutString(StringRef LayoutString);
@@ -337,13 +339,17 @@ class DataLayout {
 
   /// Return the address spaces containing non-integral pointers.  Pointers in
   /// this address space don't have a well-defined bitwise representation.
-  ArrayRef<unsigned> getNonIntegralAddressSpaces() const {
-    return NonIntegralAddressSpaces;
+  SmallVector<unsigned, 8> getNonIntegralAddressSpaces() const {
+    SmallVector<unsigned, 8> AddrSpaces;
+    for (const PointerSpec &PS : PointerSpecs) {
+      if (PS.IsNonIntegral)
+        AddrSpaces.push_back(PS.AddrSpace);
+    }
+    return AddrSpaces;
   }
 
   bool isNonIntegralAddressSpace(unsigned AddrSpace) const {
-    ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces();
-    return is_contained(NonIntegralSpaces, AddrSpace);
+    return getPointerSpec(AddrSpace).IsNonIntegral;
   }
 
   bool isNonIntegralPointerType(PointerType *PT) const {
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index a24801d8bdf834f..820b5c0707df6cf 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -290,8 +290,8 @@ class StructType : public Type {
   bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
 
   /// Returns true if this struct contains a scalable vector.
-  bool
-  containsScalableVectorType(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
+  bool isScalableTy(SmallPtrSetImpl<const Type *> &Visited) const;
+  using Type::isScalableTy;
 
   /// Returns true if this struct contains homogeneous scalable vector types.
   /// Note that the definition of homogeneous scalable vector type is not
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index e893295e3272b90..89dfff256e0c432 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -92,7 +92,7 @@ namespace Intrinsic {
   /// return the existing declaration.
   ///
   /// The \p Tys parameter is for intrinsics with overloaded types (e.g., those
-  /// using iAny, fAny, vAny, or iPTRAny).  For a declaration of an overloaded
+  /// using iAny, fAny, vAny, or pAny).  For a declaration of an overloaded
   /// intrinsic, Tys must provide exactly one type for each overloaded type in
   /// the intrinsic.
   Function *getOrInsertDeclaration(Module *M, ID id, ArrayRef<Type *> Tys = {});
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 94e53f372127da1..8ed57f818d60062 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -388,7 +388,7 @@ class LLVMAnyType<ValueType vt> : LLVMType<vt> {
     !eq(vt, iAny)    : ArgKind.AnyInteger,
     !eq(vt, fAny)    : ArgKind.AnyFloat,
     !eq(vt, vAny)    : ArgKind.AnyVector,
-    !eq(vt, iPTRAny) : ArgKind.AnyPointer,
+    !eq(vt, pAny)    : ArgKind.AnyPointer,
   );
   let Sig = [
     IIT_ARG.Number,
@@ -412,8 +412,8 @@ class LLVMQualPointerType<int addrspace>
     ]);
 }
 
-class LLVMAnyPointerType : LLVMAnyType<iPTRAny> {
-  assert isAny, "iPTRAny should have isOverloaded";
+class LLVMAnyPointerType : LLVMAnyType<pAny> {
+  assert isAny, "pAny should have isOverloaded";
 }
 
 // Match the type of another intrinsic parameter.  Number is an index into the
@@ -1050,6 +1050,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
   def int_nearbyint : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_round : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
   def int_roundeven    : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_sincos : DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
+                             [llvm_anyfloat_ty]>;
 
   // Truncate a floating point number with a specific rounding mode
   def int_fptrunc_round : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index e30d37f69f781ea..dada426368995d7 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -92,4 +92,6 @@ def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, L
 def int_dx_splitdouble : DefaultAttrsIntrinsic<[llvm_anyint_ty, LLVMMatchType<0>], 
     [LLVMScalarOrSameVectorWidth<0, llvm_double_ty>], [IntrNoMem]>;
 def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index b4a06f583f2c911..5164f873d00f482 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -42,6 +42,9 @@
 //   * llvm.nvvm.ptr.shared.to.gen   --> ibid.
 //   * llvm.nvvm.ptr.constant.to.gen --> ibid.
 //   * llvm.nvvm.ptr.local.to.gen    --> ibid.
+//   * llvm.nvvm.ldg.global.i        --> load addrspace(1) !load.invariant
+//   * llvm.nvvm.ldg.global.f        --> ibid.
+//   * llvm.nvvm.ldg.global.p        --> ibid.
 
 def llvm_global_ptr_ty  : LLVMQualPointerType<1>;  // (global)ptr
 def llvm_shared_ptr_ty  : LLVMQualPointerType<3>;  // (shared)ptr
@@ -1605,21 +1608,6 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
   [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
   "llvm.nvvm.ldu.global.p">;
 
-// Generated within nvvm. Use for ldg on sm_35 or later.  Second arg is the
-// pointer's alignment.
-def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
-  [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
-  "llvm.nvvm.ldg.global.i">;
-def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
-  [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
-  "llvm.nvvm.ldg.global.f">;
-def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
-  [llvm_anyptr_ty, llvm_i32_ty],
-  [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture<ArgIndex<0>>],
-  "llvm.nvvm.ldg.global.p">;
-
 // Used in nvvm internally to help address space opt and ptx code generation
 // This is for params that are passed to kernel functions by pointer by-val.
 def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index 6df2eb156a07749..ddb47390537412a 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -87,6 +87,7 @@ let TargetPrefix = "spv" in {
   def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>;
   def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
   def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>;
+  def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
 
   // Create resource handle given the binding information. Returns a 
   // type appropriate for the kind of resource given the set id, binding id,
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index d0083017fb93836..0ecca157077fdc1 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6099,6 +6099,11 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+  def int_x86_vsm4key4512
+      : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+        DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
   def int_x86_vsm4rnds4128
       : ClangBuiltin<"__builtin_ia32_vsm4rnds4128">,
         DefaultAttrsIntrinsic<[llvm_v4i32_ty],
@@ -6109,6 +6114,11 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+  def int_x86_vsm4rnds4512
+      : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+        DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
 // RAO-INT intrinsics
diff --git a/llvm/include/llvm/IR/Mangler.h b/llvm/include/llvm/IR/Mangler.h
index 349f9e6e7523399..3c3f0c6dce80fa8 100644
--- a/llvm/include/llvm/IR/Mangler.h
+++ b/llvm/include/llvm/IR/Mangler.h
@@ -61,6 +61,12 @@ std::optional<std::string> getArm64ECMangledFunctionName(StringRef Name);
 /// mangled.
 std::optional<std::string> getArm64ECDemangledFunctionName(StringRef Name);
 
+/// Check if an ARM64EC function name is mangled.
+bool inline isArm64ECMangledFunctionName(StringRef Name) {
+  return Name[0] == '#' ||
+         (Name[0] == '?' && Name.find("$$h") != StringRef::npos);
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h
index 45ee372e7959d1a..4e65804179ae715 100644
--- a/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/llvm/include/llvm/IR/PassInstrumentation.h
@@ -182,7 +182,7 @@ class PassInstrumentationCallbacks {
       BeforeNonSkippedPassCallbacks;
   /// These are run on passes that have just run.
   SmallVector<llvm::unique_function<AfterPassFunc>, 4> AfterPassCallbacks;
-  /// These are run passes that have just run on invalidated IR.
+  /// These are run on passes that have just run on invalidated IR.
   SmallVector<llvm::unique_function<AfterPassInvalidatedFunc>, 4>
       AfterPassInvalidatedCallbacks;
   /// These are run on analyses that are about to be run.
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index 57fb45db8491109..071575137ff572e 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -14,6 +14,9 @@
 #ifndef LLVM_IR_STRUCTURALHASH_H
 #define LLVM_IR_STRUCTURALHASH_H
 
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/StableHashing.h"
+#include "llvm/IR/Instruction.h"
 #include <cstdint>
 
 namespace llvm {
@@ -21,20 +24,61 @@ namespace llvm {
 class Function;
 class Module;
 
-using IRHash = uint64_t;
-
 /// Returns a hash of the function \p F.
 /// \param F The function to hash.
 /// \param DetailedHash Whether or not to encode additional information in the
 /// hash. The additional information added into the hash when this flag is set
 /// to true includes instruction and operand type information.
-IRHash StructuralHash(const Function &F, bool DetailedHash = false);
+stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
 
 /// Returns a hash of the module \p M by hashing all functions and global
 /// variables contained within. \param M The module to hash. \param DetailedHash
 /// Whether or not to encode additional information in the function hashes that
 /// composed the module hash.
-IRHash StructuralHash(const Module &M, bool DetailedHash = false);
+stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
+
+/// The pair of an instruction index and a operand index.
+using IndexPair = std::pair<unsigned, unsigned>;
+
+/// A map from an instruction index to an instruction pointer.
+using IndexInstrMap = MapVector<unsigned, Instruction *>;
+
+/// A map from an IndexPair to a stable hash.
+using IndexOperandHashMapType = DenseMap<IndexPair, stable_hash>;
+
+/// A function that takes an instruction and an operand index and returns true
+/// if the operand should be ignored in the function hash computation.
+using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;
+
+struct FunctionHashInfo {
+  /// A hash value representing the structural content of the function
+  stable_hash FunctionHash;
+  /// A mapping from instruction indices to instruction pointers
+  std::unique_ptr<IndexInstrMap> IndexInstruction;
+  /// A mapping from pairs of instruction indices and operand indices
+  /// to the hashes of the operands. This can be used to analyze or
+  /// reconstruct the differences in ignored operands
+  std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
+
+  FunctionHashInfo(stable_hash FuntionHash,
+                   std::unique_ptr<IndexInstrMap> IndexInstruction,
+                   std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
+      : FunctionHash(FuntionHash),
+        IndexInstruction(std::move(IndexInstruction)),
+        IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
+};
+
+/// Computes a structural hash of a given function, considering the structure
+/// and content of the function's instructions while allowing for selective
+/// ignoring of certain operands based on custom criteria. This hash can be used
+/// to identify functions that are structurally similar or identical, which is
+/// useful in optimizations, deduplication, or analysis tasks.
+/// \param F The function to hash.
+/// \param IgnoreOp A callable that takes an instruction and an operand index,
+/// and returns true if the operand should be ignored in the hash computation.
+/// \return A FunctionHashInfo structure
+FunctionHashInfo StructuralHashWithDifferences(const Function &F,
+                                               IgnoreOperandFunc IgnoreOp);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 2f53197df199986..d563b25d600a0c3 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -206,6 +206,7 @@ class Type {
   bool isScalableTargetExtTy() const;
 
   /// Return true if this is a type whose size is a known multiple of vscale.
+  bool isScalableTy(SmallPtrSetImpl<const Type *> &Visited) const;
   bool isScalableTy() const;
 
   /// Return true if this is a FP type or a vector of FP.
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 26f5d63553c5a87..54c070401ec8a40 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -101,7 +101,7 @@ void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &);
 void initializeEarlyIfConverterLegacyPass(PassRegistry &);
 void initializeEarlyIfPredicatorPass(PassRegistry &);
 void initializeEarlyMachineLICMPass(PassRegistry &);
-void initializeEarlyTailDuplicatePass(PassRegistry &);
+void initializeEarlyTailDuplicateLegacyPass(PassRegistry &);
 void initializeEdgeBundlesPass(PassRegistry &);
 void initializeEHContGuardCatchretPass(PassRegistry &);
 void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &);
@@ -300,7 +300,7 @@ void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &);
 void initializeStripDebugMachineModulePass(PassRegistry &);
 void initializeStructurizeCFGLegacyPassPass(PassRegistry &);
 void initializeTailCallElimPass(PassRegistry &);
-void initializeTailDuplicatePass(PassRegistry &);
+void initializeTailDuplicateLegacyPass(PassRegistry &);
 void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &);
 void initializeTargetPassConfigPass(PassRegistry &);
 void initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index bea79545d1ab96e..1392336968e74ab 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -515,6 +515,7 @@ class MCCFIInstruction {
     OpRegister,
     OpWindowSave,
     OpNegateRAState,
+    OpNegateRAStateWithPC,
     OpGnuArgsSize,
     OpLabel,
   };
@@ -642,6 +643,12 @@ class MCCFIInstruction {
     return MCCFIInstruction(OpNegateRAState, L, 0, INT64_C(0), Loc);
   }
 
+  /// .cfi_negate_ra_state_with_pc AArch64 negate RA state with PC.
+  static MCCFIInstruction createNegateRAStateWithPC(MCSymbol *L,
+                                                    SMLoc Loc = {}) {
+    return MCCFIInstruction(OpNegateRAStateWithPC, L, 0, INT64_C(0), Loc);
+  }
+
   /// .cfi_restore says that the rule for Register is now the same as it
   /// was at the beginning of the function, after all initial instructions added
   /// by .cfi_startproc were executed.
diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h
index 60a901e3d0deae6..e825c04a6dba6f9 100644
--- a/llvm/include/llvm/MC/MCInstPrinter.h
+++ b/llvm/include/llvm/MC/MCInstPrinter.h
@@ -9,8 +9,10 @@
 #ifndef LLVM_MC_MCINSTPRINTER_H
 #define LLVM_MC_MCINSTPRINTER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 
 namespace llvm {
@@ -24,7 +26,6 @@ class MCRegister;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class StringRef;
-class raw_ostream;
 
 /// Convert `Bytes' to a hex string and output to `OS'
 void dumpBytes(ArrayRef<uint8_t> Bytes, raw_ostream &OS);
@@ -76,6 +77,8 @@ class MCInstPrinter {
   /// If true, symbolize branch target and memory reference operands.
   bool SymbolizeOperands = false;
 
+  SmallVector<raw_ostream::Colors, 4> ColorStack{raw_ostream::Colors::RESET};
+
   /// Utility function for printing annotations.
   void printAnnotation(raw_ostream &OS, StringRef Annot);
 
@@ -98,8 +101,8 @@ class MCInstPrinter {
 
   class WithMarkup {
   public:
-    LLVM_CTOR_NODISCARD WithMarkup(raw_ostream &OS, Markup M, bool EnableMarkup,
-                                   bool EnableColor);
+    LLVM_CTOR_NODISCARD WithMarkup(MCInstPrinter &IP, raw_ostream &OS, Markup M,
+                                   bool EnableMarkup, bool EnableColor);
     ~WithMarkup();
 
     template <typename T> WithMarkup &operator<<(T &O) {
@@ -113,6 +116,7 @@ class MCInstPrinter {
     }
 
   private:
+    MCInstPrinter &IP;
     raw_ostream &OS;
     bool EnableMarkup;
     bool EnableColor;
@@ -144,7 +148,7 @@ class MCInstPrinter {
   StringRef getOpcodeName(unsigned Opcode) const;
 
   /// Print the assembler register name.
-  virtual void printRegName(raw_ostream &OS, MCRegister Reg) const;
+  virtual void printRegName(raw_ostream &OS, MCRegister Reg);
 
   bool getUseMarkup() const { return UseMarkup; }
   void setUseMarkup(bool Value) { UseMarkup = Value; }
@@ -152,7 +156,7 @@ class MCInstPrinter {
   bool getUseColor() const { return UseColor; }
   void setUseColor(bool Value) { UseColor = Value; }
 
-  WithMarkup markup(raw_ostream &OS, Markup M) const;
+  WithMarkup markup(raw_ostream &OS, Markup M);
 
   bool getPrintImmHex() const { return PrintImmHex; }
   void setPrintImmHex(bool Value) { PrintImmHex = Value; }
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index faa72d5f3144c43..70fba69778536e4 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -206,7 +206,7 @@ class MCAsmParser {
       SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
       SmallVectorImpl<std::string> &Constraints,
       SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
-      const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0;
+      MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0;
 
   /// Emit a note at the location \p L, with the message \p Msg.
   virtual void Note(SMLoc L, const Twine &Msg,
diff --git a/llvm/include/llvm/MC/MCSectionELF.h b/llvm/include/llvm/MC/MCSectionELF.h
index d43ffbd885c961e..f09d30591a3cf60 100644
--- a/llvm/include/llvm/MC/MCSectionELF.h
+++ b/llvm/include/llvm/MC/MCSectionELF.h
@@ -62,6 +62,8 @@ class MCSectionELF final : public MCSection {
                   type == ELF::SHT_NOBITS, Begin),
         Type(type), Flags(flags), UniqueID(UniqueID), EntrySize(entrySize),
         Group(group, IsComdat), LinkedToSym(LinkedToSym) {
+    assert((!(Flags & ELF::SHF_GROUP) || Group.getPointer()) &&
+           "Group section without signature!");
     if (Group.getPointer())
       Group.getPointer()->setIsSignature();
   }
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 707aecc5dc578e0..a376ba810ba5152 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -1022,6 +1022,7 @@ class MCStreamer {
                                SMLoc Loc = {});
   virtual void emitCFIWindowSave(SMLoc Loc = {});
   virtual void emitCFINegateRAState(SMLoc Loc = {});
+  virtual void emitCFINegateRAStateWithPC(SMLoc Loc = {});
   virtual void emitCFILabelDirective(SMLoc Loc, StringRef Name);
 
   virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
diff --git a/llvm/include/llvm/MC/MCXCOFFObjectWriter.h b/llvm/include/llvm/MC/MCXCOFFObjectWriter.h
index c0e32a70172d882..968d938a6549815 100644
--- a/llvm/include/llvm/MC/MCXCOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCXCOFFObjectWriter.h
@@ -39,19 +39,18 @@ class MCXCOFFObjectTargetWriter : public MCObjectTargetWriter {
   bool Is64Bit;
 };
 
+class XCOFFObjectWriter : public MCObjectWriter {
+public:
+  virtual void addExceptionEntry(const MCSymbol *Symbol, const MCSymbol *Trap,
+                                 unsigned LanguageCode, unsigned ReasonCode,
+                                 unsigned FunctionSize, bool hasDebug) = 0;
+  virtual void addCInfoSymEntry(StringRef Name, StringRef Metadata) = 0;
+};
+
 std::unique_ptr<MCObjectWriter>
 createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
                         raw_pwrite_stream &OS);
 
-namespace XCOFF {
-void addExceptionEntry(MCObjectWriter &Writer, const MCSymbol *Symbol,
-                       const MCSymbol *Trap, unsigned LanguageCode,
-                       unsigned ReasonCode, unsigned FunctionSize,
-                       bool hasDebug);
-void addCInfoSymEntry(MCObjectWriter &Writer, StringRef Name,
-                      StringRef Metadata);
-} // namespace XCOFF
-
 } // end namespace llvm
 
 #endif // LLVM_MC_MCXCOFFOBJECTWRITER_H
diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h
index 8cae64fa33be050..3c7abfea85ecf5a 100644
--- a/llvm/include/llvm/MC/MCXCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCObjectStreamer.h"
 
 namespace llvm {
+class XCOFFObjectWriter;
 
 class MCXCOFFStreamer : public MCObjectStreamer {
 public:
@@ -19,6 +20,8 @@ class MCXCOFFStreamer : public MCObjectStreamer {
                   std::unique_ptr<MCObjectWriter> OW,
                   std::unique_ptr<MCCodeEmitter> Emitter);
 
+  XCOFFObjectWriter &getWriter();
+
   bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
   void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         Align ByteAlignment) override;
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index ad80c661147d6f5..9e95625fd1d881e 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -60,6 +60,7 @@
 #include "llvm/CodeGen/SjLjEHPrepare.h"
 #include "llvm/CodeGen/StackColoring.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TailDuplication.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
 #include "llvm/CodeGen/UnreachableBlockElim.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 4f32a917738c134..9d12a120ff7ac6d 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -133,6 +133,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
+MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
@@ -157,6 +158,7 @@ MACHINE_FUNCTION_PASS("print<virtregmap>", VirtRegMapPrinterPass(dbgs()))
 MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
                       RequireAllMachineFunctionPropertiesPass())
 MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass())
+MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass())
 MACHINE_FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
 MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass())
 MACHINE_FUNCTION_PASS("verify", MachineVerifierPass())
@@ -210,7 +212,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass)
 DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter)
-DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass)
 DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass)
 DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass)
@@ -262,7 +263,6 @@ DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass)
 DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
-DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
 DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
 DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass)
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index b8b6c684717b058..559549b0a22cc97 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -78,12 +78,20 @@ class InstrProfWriter {
   // Whether to serialize the full schema.
   bool MemProfFullSchema;
 
+  // Whether to generated random memprof hotness for testing.
+  bool MemprofGenerateRandomHotness;
+
 public:
+  // For memprof testing, random hotness can be assigned to the contexts if
+  // MemprofGenerateRandomHotness is enabled. The random seed can be either
+  // provided by MemprofGenerateRandomHotnessSeed, or if that is 0, one will be
+  // generated in the writer using the current time.
   InstrProfWriter(
       bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0,
       uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false,
       memprof::IndexedVersion MemProfVersionRequested = memprof::Version0,
-      bool MemProfFullSchema = false);
+      bool MemProfFullSchema = false, bool MemprofGenerateRandomHotness = false,
+      unsigned MemprofGenerateRandomHotnessSeed = 0);
   ~InstrProfWriter();
 
   StringMap<ProfilingData> &getProfileData() { return FunctionData; }
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index f8121d35732518b..da2cc807370095d 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -147,6 +147,15 @@ struct PortableMemInfoBlock {
     return Name;                                                               \
   }
 #include "llvm/ProfileData/MIBEntryDef.inc"
+#undef MIBEntryDef
+
+  // Define setters for each type which can be called by the writer.
+#define MIBEntryDef(NameTag, Name, Type)                                       \
+  void set##Name(Type NewVal) {                                                \
+    assert(Schema[llvm::to_underlying(Meta::Name)]);                           \
+    Name = NewVal;                                                             \
+  }
+#include "llvm/ProfileData/MIBEntryDef.inc"
 #undef MIBEntryDef
 
   void clear() { *this = PortableMemInfoBlock(); }
diff --git a/llvm/include/llvm/Remarks/HotnessThresholdParser.h b/llvm/include/llvm/Remarks/HotnessThresholdParser.h
index 4cd0d2dff2fe640..374992de59878a5 100644
--- a/llvm/include/llvm/Remarks/HotnessThresholdParser.h
+++ b/llvm/include/llvm/Remarks/HotnessThresholdParser.h
@@ -16,6 +16,7 @@
 #define LLVM_REMARKS_HOTNESSTHRESHOLDPARSER_H
 
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
 #include <optional>
 
 namespace llvm {
diff --git a/llvm/include/llvm/SandboxIR/Context.h b/llvm/include/llvm/SandboxIR/Context.h
index 1285598a1c02822..f2056de87cb946c 100644
--- a/llvm/include/llvm/SandboxIR/Context.h
+++ b/llvm/include/llvm/SandboxIR/Context.h
@@ -9,18 +9,39 @@
 #ifndef LLVM_SANDBOXIR_CONTEXT_H
 #define LLVM_SANDBOXIR_CONTEXT_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/SandboxIR/Tracker.h"
 #include "llvm/SandboxIR/Type.h"
 
+#include <cstdint>
+
 namespace llvm::sandboxir {
 
-class Module;
-class Value;
 class Argument;
+class BBIterator;
 class Constant;
+class Module;
+class Value;
 
 class Context {
+public:
+  // A EraseInstrCallback receives the instruction about to be erased.
+  using EraseInstrCallback = std::function<void(Instruction *)>;
+  // A CreateInstrCallback receives the instruction about to be created.
+  using CreateInstrCallback = std::function<void(Instruction *)>;
+  // A MoveInstrCallback receives the instruction about to be moved, the
+  // destination BB and an iterator pointing to the insertion position.
+  using MoveInstrCallback =
+      std::function<void(Instruction *, const BBIterator &)>;
+
+  /// An ID for a registered callback. Used for deregistration. Using a 64-bit
+  /// integer so we don't have to worry about the unlikely case of overflowing
+  /// a 32-bit counter.
+  using CallbackID = uint64_t;
+
 protected:
   LLVMContext &LLVMCtx;
   friend class Type;        // For LLVMCtx.
@@ -48,6 +69,21 @@ class Context {
   /// Type objects.
   DenseMap<llvm::Type *, std::unique_ptr<Type, TypeDeleter>> LLVMTypeToTypeMap;
 
+  /// Callbacks called when an IR instruction is about to get erased. Keys are
+  /// used as IDs for deregistration.
+  MapVector<CallbackID, EraseInstrCallback> EraseInstrCallbacks;
+  /// Callbacks called when an IR instruction is about to get created. Keys are
+  /// used as IDs for deregistration.
+  MapVector<CallbackID, CreateInstrCallback> CreateInstrCallbacks;
+  /// Callbacks called when an IR instruction is about to get moved. Keys are
+  /// used as IDs for deregistration.
+  MapVector<CallbackID, MoveInstrCallback> MoveInstrCallbacks;
+
+  /// A counter used for assigning callback IDs during registration. The same
+  /// counter is used for all kinds of callbacks so we can detect mismatched
+  /// registration/deregistration.
+  CallbackID NextCallbackID = 0;
+
   /// Remove \p V from the maps and returns the unique_ptr.
   std::unique_ptr<Value> detachLLVMValue(llvm::Value *V);
   /// Remove \p SBV from all SandboxIR maps and stop owning it. This effectively
@@ -70,6 +106,10 @@ class Context {
   Constant *getOrCreateConstant(llvm::Constant *LLVMC);
   friend class Utils; // For getMemoryBase
 
+  void runEraseInstrCallbacks(Instruction *I);
+  void runCreateInstrCallbacks(Instruction *I);
+  void runMoveInstrCallbacks(Instruction *I, const BBIterator &Where);
+
   // Friends for getOrCreateConstant().
 #define DEF_CONST(ID, CLASS) friend class CLASS;
 #include "llvm/SandboxIR/Values.def"
@@ -198,6 +238,28 @@ class Context {
 
   /// \Returns the number of values registered with Context.
   size_t getNumValues() const { return LLVMValueToValueMap.size(); }
+
+  /// Register a callback that gets called when a SandboxIR instruction is about
+  /// to be removed from its parent. Note that this will also be called when
+  /// reverting the creation of an instruction.
+  /// \Returns a callback ID for later deregistration.
+  CallbackID registerEraseInstrCallback(EraseInstrCallback CB);
+  void unregisterEraseInstrCallback(CallbackID ID);
+
+  /// Register a callback that gets called right after a SandboxIR instruction
+  /// is created. Note that this will also be called when reverting the removal
+  /// of an instruction.
+  /// \Returns a callback ID for later deregistration.
+  CallbackID registerCreateInstrCallback(CreateInstrCallback CB);
+  void unregisterCreateInstrCallback(CallbackID ID);
+
+  /// Register a callback that gets called when a SandboxIR instruction is about
+  /// to be moved. Note that this will also be called when reverting a move.
+  /// \Returns a callback ID for later deregistration.
+  CallbackID registerMoveInstrCallback(MoveInstrCallback CB);
+  void unregisterMoveInstrCallback(CallbackID ID);
+
+  // TODO: Add callbacks for instructions inserted/removed if needed.
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/SandboxIR/Pass.h b/llvm/include/llvm/SandboxIR/Pass.h
index 5ed9d7442ee70cd..fee6bd9e779fda6 100644
--- a/llvm/include/llvm/SandboxIR/Pass.h
+++ b/llvm/include/llvm/SandboxIR/Pass.h
@@ -12,11 +12,29 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
-namespace llvm::sandboxir {
+namespace llvm {
+
+class ScalarEvolution;
+
+namespace sandboxir {
 
 class Function;
 class Region;
 
+class Analyses {
+  ScalarEvolution *SE = nullptr;
+
+  Analyses() = default;
+
+public:
+  Analyses(ScalarEvolution &SE) : SE(&SE) {}
+
+public:
+  ScalarEvolution &getScalarEvolution() const { return *SE; }
+  /// For use by unit tests.
+  static Analyses emptyForTesting() { return Analyses(); }
+};
+
 /// The base class of a Sandbox IR Pass.
 class Pass {
 protected:
@@ -52,7 +70,7 @@ class FunctionPass : public Pass {
   /// \p Name can't contain any spaces or start with '-'.
   FunctionPass(StringRef Name) : Pass(Name) {}
   /// \Returns true if it modifies \p F.
-  virtual bool runOnFunction(Function &F) = 0;
+  virtual bool runOnFunction(Function &F, const Analyses &A) = 0;
 };
 
 /// A pass that runs on a sandbox::Region.
@@ -61,9 +79,10 @@ class RegionPass : public Pass {
   /// \p Name can't contain any spaces or start with '-'.
   RegionPass(StringRef Name) : Pass(Name) {}
   /// \Returns true if it modifies \p R.
-  virtual bool runOnRegion(Region &R) = 0;
+  virtual bool runOnRegion(Region &R, const Analyses &A) = 0;
 };
 
-} // namespace llvm::sandboxir
+} // namespace sandboxir
+} // namespace llvm
 
 #endif // LLVM_SANDBOXIR_PASS_H
diff --git a/llvm/include/llvm/SandboxIR/PassManager.h b/llvm/include/llvm/SandboxIR/PassManager.h
index e8221996bc8f049..77154cc71434546 100644
--- a/llvm/include/llvm/SandboxIR/PassManager.h
+++ b/llvm/include/llvm/SandboxIR/PassManager.h
@@ -208,7 +208,7 @@ class FunctionPassManager final
   FunctionPassManager(StringRef Name, StringRef Pipeline,
                       CreatePassFunc CreatePass)
       : PassManager(Name, Pipeline, CreatePass) {}
-  bool runOnFunction(Function &F) final;
+  bool runOnFunction(Function &F, const Analyses &A) final;
 };
 
 class RegionPassManager final : public PassManager<RegionPass, RegionPass> {
@@ -217,7 +217,7 @@ class RegionPassManager final : public PassManager<RegionPass, RegionPass> {
   RegionPassManager(StringRef Name, StringRef Pipeline,
                     CreatePassFunc CreatePass)
       : PassManager(Name, Pipeline, CreatePass) {}
-  bool runOnRegion(Region &R) final;
+  bool runOnRegion(Region &R, const Analyses &A) final;
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index 591e7647795bb21..f9c57b89f1f033a 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -413,6 +413,12 @@
 #define LLVM_GSL_POINTER
 #endif
 
+#if LLVM_HAS_CPP_ATTRIBUTE(clang::lifetimebound)
+#define LLVM_LIFETIME_BOUND [[clang::lifetimebound]]
+#else
+#define LLVM_LIFETIME_BOUND
+#endif
+
 #if LLVM_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L
 #define LLVM_CTOR_NODISCARD [[nodiscard]]
 #else
diff --git a/llvm/include/llvm/Support/Endian.h b/llvm/include/llvm/Support/Endian.h
index 5831fe66a1f7b71..f86ea8901ae46b8 100644
--- a/llvm/include/llvm/Support/Endian.h
+++ b/llvm/include/llvm/Support/Endian.h
@@ -58,7 +58,7 @@ template <typename value_type, std::size_t alignment = unaligned>
 [[nodiscard]] inline value_type read(const void *memory, endianness endian) {
   value_type ret;
 
-  memcpy(&ret,
+  memcpy(static_cast<void *>(&ret),
          LLVM_ASSUME_ALIGNED(
              memory, (detail::PickAlignment<value_type, alignment>::value)),
          sizeof(value_type));
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 3556a253d875fe6..0c4c6ccd5c568e3 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -809,6 +809,9 @@ HANDLE_TARGET_OPCODE(G_FCOS)
 /// Floating point sine.
 HANDLE_TARGET_OPCODE(G_FSIN)
 
+/// Floating point combined sine and cosine.
+HANDLE_TARGET_OPCODE(G_FSINCOS)
+
 /// Floating point tangent.
 HANDLE_TARGET_OPCODE(G_FTAN)
 
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 8b8bc9a0e9cf565..62bb9789afe5d26 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1020,6 +1020,13 @@ def G_FSIN : GenericInstruction {
   let hasSideEffects = false;
 }
 
+// Floating point combined sine and cosine.
+def G_FSINCOS : GenericInstruction {
+  let OutOperandList = (outs type0:$dst1, type0:$dst2);
+  let InOperandList = (ins type0:$src1);
+  let hasSideEffects = false;
+}
+
 // Floating point tangent of a value.
 def G_FTAN : GenericInstruction {
   let OutOperandList = (outs type0:$dst);
diff --git a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc
index 902fa8f79ab8164..e454524c9cb6a23 100644
--- a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc
+++ b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc
@@ -53,9 +53,9 @@ enum CPUFeatures {
   FEAT_EBF16,
   FEAT_RPRES,
   FEAT_SVE,
-  FEAT_SVE_BF16,
-  FEAT_SVE_EBF16,
-  FEAT_SVE_I8MM,
+  RESERVED_FEAT_SVE_BF16,  // previously used and now ABI legacy
+  RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy
+  RESERVED_FEAT_SVE_I8MM,  // previously used and now ABI legacy
   FEAT_SVE_F32MM,
   FEAT_SVE_F64MM,
   FEAT_SVE2,
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 5920dde9d77dfd9..e82155a6c72974f 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -64,6 +64,13 @@
 // - Perhaps a post-inlining function specialization pass could be more
 //   aggressive on literal constants.
 //
+// Limitations:
+// ------------
+// - We are unable to consider specializations of functions called from indirect
+//   callsites whose pointer operand has a lattice value that is known to be
+//   constant, either from IPSCCP or previous iterations of FuncSpec. This is
+//   because SCCP has not yet replaced the uses of the known constant.
+//
 // References:
 // -----------
 // 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
@@ -131,13 +138,16 @@ struct Spec {
   // Profitability of the specialization.
   unsigned Score;
 
+  // Number of instructions in the specialization.
+  unsigned CodeSize;
+
   // List of call sites, matching this specialization.
   SmallVector<CallBase *> CallSites;
 
-  Spec(Function *F, const SpecSig &S, unsigned Score)
-      : F(F), Sig(S), Score(Score) {}
-  Spec(Function *F, const SpecSig &&S, unsigned Score)
-      : F(F), Sig(S), Score(Score) {}
+  Spec(Function *F, const SpecSig &S, unsigned Score, unsigned CodeSize)
+      : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
+  Spec(Function *F, const SpecSig &&S, unsigned Score, unsigned CodeSize)
+      : F(F), Sig(S), Score(Score), CodeSize(CodeSize) {}
 };
 
 class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
index bcfafd75d4caaf5..77ba5cd7f002e91 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h
@@ -28,8 +28,11 @@ enum class LegalityResultID {
 
 /// The reason for vectorizing or not vectorizing.
 enum class ResultReason {
+  NotInstructions,
   DiffOpcodes,
   DiffTypes,
+  DiffMathFlags,
+  DiffWrapFlags,
 };
 
 #ifndef NDEBUG
@@ -46,10 +49,16 @@ struct ToStr {
 
   static const char *getVecReason(ResultReason Reason) {
     switch (Reason) {
+    case ResultReason::NotInstructions:
+      return "NotInstructions";
     case ResultReason::DiffOpcodes:
       return "DiffOpcodes";
     case ResultReason::DiffTypes:
       return "DiffTypes";
+    case ResultReason::DiffMathFlags:
+      return "DiffMathFlags";
+    case ResultReason::DiffWrapFlags:
+      return "DiffWrapFlags";
     }
     llvm_unreachable("Unknown ResultReason enum");
   }
@@ -67,6 +76,10 @@ class LegalityResult {
   LegalityResult(LegalityResultID ID) : ID(ID) {}
   friend class LegalityAnalysis;
 
+  /// We shouldn't need copies.
+  LegalityResult(const LegalityResult &) = delete;
+  LegalityResult &operator=(const LegalityResult &) = delete;
+
 public:
   virtual ~LegalityResult() {}
   LegalityResultID getSubclassID() const { return ID; }
@@ -90,6 +103,7 @@ class LegalityResultWithReason : public LegalityResult {
   friend class Pack; // For constructor.
 
 public:
+  ResultReason getReason() const { return Reason; }
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override {
     LegalityResult::print(OS);
@@ -138,7 +152,7 @@ class LegalityAnalysis {
   }
   /// Checks if it's legal to vectorize the instructions in \p Bndl.
   /// \Returns a LegalityResult object owned by LegalityAnalysis.
-  LegalityResult &canVectorize(ArrayRef<Value *> Bndl);
+  const LegalityResult &canVectorize(ArrayRef<Value *> Bndl);
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
index 5cd47efd6b34620..2b0b3f8192c0482 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h
@@ -33,7 +33,7 @@ class BottomUpVec final : public FunctionPass {
 
 public:
   BottomUpVec(StringRef Pipeline);
-  bool runOnFunction(Function &F) final;
+  bool runOnFunction(Function &F, const Analyses &A) final;
   void printPipeline(raw_ostream &OS) const final {
     OS << getName() << "\n";
     RPM.printPipeline(OS);
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h
index 75b9f42520156ce..1025379770bac07 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/NullPass.h
@@ -11,7 +11,7 @@ class Region;
 class NullPass final : public RegionPass {
 public:
   NullPass() : RegionPass("null") {}
-  bool runOnRegion(Region &R) final { return false; }
+  bool runOnRegion(Region &R, const Analyses &A) final { return false; }
 };
 
 } // namespace llvm::sandboxir
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h
index 9d88bc828038479..cd11d4c1489268b 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/PrintInstructionCount.h
@@ -12,7 +12,7 @@ namespace llvm::sandboxir {
 class PrintInstructionCount final : public RegionPass {
 public:
   PrintInstructionCount() : RegionPass("null") {}
-  bool runOnRegion(Region &R) final {
+  bool runOnRegion(Region &R, const Analyses &A) final {
     outs() << "InstructionCount: " << std::distance(R.begin(), R.end()) << "\n";
     return false;
   }
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h
index 3d82a61c90153aa..3d738ac8917effd 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.h
@@ -26,7 +26,7 @@ class RegionsFromMetadata final : public FunctionPass {
 
 public:
   RegionsFromMetadata(StringRef Pipeline);
-  bool runOnFunction(Function &F) final;
+  bool runOnFunction(Function &F, const Analyses &A) final;
   void printPipeline(raw_ostream &OS) const final {
     OS << getName() << "\n";
     RPM.printPipeline(OS);
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h
index b83744cf9e6cb68..03867df3d980845 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.h
@@ -10,6 +10,7 @@
 
 #include <memory>
 
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/SandboxIR/PassManager.h"
 
@@ -19,6 +20,7 @@ class TargetTransformInfo;
 
 class SandboxVectorizerPass : public PassInfoMixin<SandboxVectorizerPass> {
   TargetTransformInfo *TTI = nullptr;
+  ScalarEvolution *SE = nullptr;
 
   // A pipeline of SandboxIR function passes run by the vectorizer.
   sandboxir::FunctionPassManager FPM;
diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
index 64f57edb38484ef..9577e8ef7b37cb9 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h
@@ -12,7 +12,11 @@
 #ifndef LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H
 #define LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H
 
-class Utils {
+#include "llvm/SandboxIR/Type.h"
+
+namespace llvm::sandboxir {
+
+class VecUtils {
 public:
   /// \Returns the number of elements in \p Ty. That is the number of lanes if a
   /// fixed vector or 1 if scalar. ScalableVectors have unknown size and
@@ -25,6 +29,8 @@ class Utils {
   static Type *getElementType(Type *Ty) {
     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getElementType() : Ty;
   }
-}
+};
+
+} // namespace llvm::sandboxir
 
-#endif LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H
+#endif // LLVM_TRANSFORMS_VECTORIZE_SANDBOXVECTORIZER_VECUTILS_H
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 27360d0e84cb2b8..5d81658409dae85 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -528,7 +528,7 @@ void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
         // dso_preemptable aliases or aliases with interposable linkage.
         const GlobalValue *Callee =
             dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts());
-        if (!Callee) {
+        if (!Callee || isa<GlobalIFunc>(Callee)) {
           US.addRange(I, UnknownRange, /*IsSafe=*/false);
           break;
         }
diff --git a/llvm/lib/Analysis/StructuralHash.cpp b/llvm/lib/Analysis/StructuralHash.cpp
index 3a2341fe59ad9ce..4f2e003148b6065 100644
--- a/llvm/lib/Analysis/StructuralHash.cpp
+++ b/llvm/lib/Analysis/StructuralHash.cpp
@@ -21,14 +21,33 @@ using namespace llvm;
 PreservedAnalyses StructuralHashPrinterPass::run(Module &M,
                                                  ModuleAnalysisManager &MAM) {
   OS << "Module Hash: "
-     << format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash))
+     << format("%016" PRIx64,
+               StructuralHash(M, Options != StructuralHashOptions::None))
      << "\n";
   for (Function &F : M) {
     if (F.isDeclaration())
       continue;
-    OS << "Function " << F.getName() << " Hash: "
-       << format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash))
-       << "\n";
+    if (Options == StructuralHashOptions::CallTargetIgnored) {
+      auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
+        return I->getOpcode() == Instruction::Call &&
+               isa<Constant>(I->getOperand(OpndIdx));
+      };
+      auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp);
+      OS << "Function " << F.getName()
+         << " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash)
+         << "\n";
+      for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) {
+        auto [InstIndex, OpndIndex] = IndexPair;
+        OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash)
+           << " at (" << InstIndex << "," << OpndIndex << ")\n";
+      }
+    } else {
+      OS << "Function " << F.getName() << " Hash: "
+         << format(
+                "%016" PRIx64,
+                StructuralHash(F, Options == StructuralHashOptions::Detailed))
+         << "\n";
+    }
   }
   return PreservedAnalyses::all();
 }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e9ed8b3c862b555..aa5142f33624099 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4241,6 +4241,10 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
   case LibFunc_exp2f:
   case LibFunc_exp2l:
     return Intrinsic::exp2;
+  case LibFunc_exp10:
+  case LibFunc_exp10f:
+  case LibFunc_exp10l:
+    return Intrinsic::exp10;
   case LibFunc_log:
   case LibFunc_logf:
   case LibFunc_logl:
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 37c443011719b62..cd5cf0443541fc8 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -76,6 +76,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
   case Intrinsic::cosh:
   case Intrinsic::tanh:
   case Intrinsic::exp:
+  case Intrinsic::exp10:
   case Intrinsic::exp2:
   case Intrinsic::log:
   case Intrinsic::log10:
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 759db6db60774c2..56abd03d6235415 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -60,8 +60,8 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
     uint64_t OldRes = Result;
     Result *= 10;
     Result += *Buffer-'0';
-    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
-      LexError("constant bigger than 64 bits detected!");
+    if (Result < OldRes) { // overflow detected.
+      LexError("constant bigger than 64 bits detected");
       return 0;
     }
   }
@@ -75,8 +75,8 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
     Result *= 16;
     Result += hexDigitValue(*Buffer);
 
-    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
-      LexError("constant bigger than 64 bits detected!");
+    if (Result < OldRes) { // overflow detected.
+      LexError("constant bigger than 64 bits detected");
       return 0;
     }
   }
@@ -99,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
     Pair[1] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
-    LexError("constant bigger than 128 bits detected!");
+    LexError("constant bigger than 128 bits detected");
 }
 
 /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
@@ -118,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
     Pair[0] += hexDigitValue(*Buffer);
   }
   if (Buffer != End)
-    LexError("constant bigger than 128 bits detected!");
+    LexError("constant bigger than 128 bits detected");
 }
 
 // UnEscapeLexed - Run through the specified buffer and change \xx codes to the
@@ -292,7 +292,7 @@ lltok::Kind LLLexer::LexDollar() {
         StrVal.assign(TokStart + 2, CurPtr - 1);
         UnEscapeLexed(StrVal);
         if (StringRef(StrVal).contains(0)) {
-          LexError("Null bytes are not allowed in names");
+          LexError("NUL character is not allowed in names");
           return lltok::Error;
         }
         return lltok::ComdatVar;
@@ -354,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) {
 
   uint64_t Val = atoull(TokStart + 1, CurPtr);
   if ((unsigned)Val != Val)
-    LexError("invalid value number (too large)!");
+    LexError("invalid value number (too large)");
   UIntVal = unsigned(Val);
   return Token;
 }
@@ -375,7 +375,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
         StrVal.assign(TokStart+2, CurPtr-1);
         UnEscapeLexed(StrVal);
         if (StringRef(StrVal).contains(0)) {
-          LexError("Null bytes are not allowed in names");
+          LexError("NUL character is not allowed in names");
           return lltok::Error;
         }
         return Var;
@@ -410,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() {
   if (CurPtr[0] == ':') {
     ++CurPtr;
     if (StringRef(StrVal).contains(0)) {
-      LexError("Null bytes are not allowed in names");
+      LexError("NUL character is not allowed in names");
       kind = lltok::Error;
     } else {
       kind = lltok::LabelStr;
@@ -492,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() {
     uint64_t NumBits = atoull(StartChar, CurPtr);
     if (NumBits < IntegerType::MIN_INT_BITS ||
         NumBits > IntegerType::MAX_INT_BITS) {
-      LexError("bitwidth for integer type out of range!");
+      LexError("bitwidth for integer type out of range");
       return lltok::Error;
     }
     TyVal = IntegerType::get(Context, NumBits);
@@ -1122,7 +1122,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     uint64_t Val = atoull(TokStart, CurPtr);
     ++CurPtr; // Skip the colon.
     if ((unsigned)Val != Val)
-      LexError("invalid value number (too large)!");
+      LexError("invalid value number (too large)");
     UIntVal = unsigned(Val);
     return lltok::LabelID;
   }
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 6a2372c97514087..8ddb2efb0e26c24 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8525,7 +8525,7 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     return error(Loc, "base element of getelementptr must be sized");
 
   auto *STy = dyn_cast<StructType>(Ty);
-  if (STy && STy->containsScalableVectorType())
+  if (STy && STy->isScalableTy())
     return error(Loc, "getelementptr cannot target structure that contains "
                       "scalable vector type");
 
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 4aea059551dedce..446c98c8cecd884 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2165,8 +2165,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::SanitizeNumericalStability;
   case bitc::ATTR_KIND_SANITIZE_REALTIME:
     return Attribute::SanitizeRealtime;
-  case bitc::ATTR_KIND_SANITIZE_REALTIME_UNSAFE:
-    return Attribute::SanitizeRealtimeUnsafe;
+  case bitc::ATTR_KIND_SANITIZE_REALTIME_BLOCKING:
+    return Attribute::SanitizeRealtimeBlocking;
   case bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING:
     return Attribute::SpeculativeLoadHardening;
   case bitc::ATTR_KIND_SWIFT_ERROR:
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index d9002149fba55af..ee9cc4b6e0c0ebd 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -853,8 +853,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY;
   case Attribute::SanitizeRealtime:
     return bitc::ATTR_KIND_SANITIZE_REALTIME;
-  case Attribute::SanitizeRealtimeUnsafe:
-    return bitc::ATTR_KIND_SANITIZE_REALTIME_UNSAFE;
+  case Attribute::SanitizeRealtimeBlocking:
+    return bitc::ATTR_KIND_SANITIZE_REALTIME_BLOCKING;
   case Attribute::SpeculativeLoadHardening:
     return bitc::ATTR_KIND_SPECULATIVE_LOAD_HARDENING;
   case Attribute::SwiftError:
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bf4c707cca06d5b..4ea71c9bd4ad4c0 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -141,18 +141,22 @@ using namespace llvm;
 // `object::PGOAnalysisMap::Features::decode(PgoAnalysisMapFeatures.getBits())`
 // succeeds.
 enum class PGOMapFeaturesEnum {
+  None,
   FuncEntryCount,
   BBFreq,
   BrProb,
+  All,
 };
 static cl::bits<PGOMapFeaturesEnum> PgoAnalysisMapFeatures(
     "pgo-analysis-map", cl::Hidden, cl::CommaSeparated,
-    cl::values(clEnumValN(PGOMapFeaturesEnum::FuncEntryCount,
-                          "func-entry-count", "Function Entry Count"),
-               clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq",
-                          "Basic Block Frequency"),
-               clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob",
-                          "Branch Probability")),
+    cl::values(
+        clEnumValN(PGOMapFeaturesEnum::None, "none", "Disable all options"),
+        clEnumValN(PGOMapFeaturesEnum::FuncEntryCount, "func-entry-count",
+                   "Function Entry Count"),
+        clEnumValN(PGOMapFeaturesEnum::BBFreq, "bb-freq",
+                   "Basic Block Frequency"),
+        clEnumValN(PGOMapFeaturesEnum::BrProb, "br-prob", "Branch Probability"),
+        clEnumValN(PGOMapFeaturesEnum::All, "all", "Enable all options")),
     cl::desc(
         "Enable extended information within the SHT_LLVM_BB_ADDR_MAP that is "
         "extracted from PGO related analysis."));
@@ -1367,9 +1371,28 @@ static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
 
 static llvm::object::BBAddrMap::Features
 getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges) {
-  return {PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::FuncEntryCount),
-          PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq),
-          PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb),
+  // Ensure that the user has not passed in additional options while also
+  // specifying all or none.
+  if ((PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None) ||
+       PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::All)) &&
+      popcount(PgoAnalysisMapFeatures.getBits()) != 1) {
+    MF.getFunction().getContext().emitError(
+        "-pgo-anaylsis-map can accept only all or none with no additional "
+        "values.");
+  }
+
+  bool NoFeatures = PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::None);
+  bool AllFeatures = PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::All);
+  bool FuncEntryCountEnabled =
+      AllFeatures || (!NoFeatures && PgoAnalysisMapFeatures.isSet(
+                                         PGOMapFeaturesEnum::FuncEntryCount));
+  bool BBFreqEnabled =
+      AllFeatures ||
+      (!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BBFreq));
+  bool BrProbEnabled =
+      AllFeatures ||
+      (!NoFeatures && PgoAnalysisMapFeatures.isSet(PGOMapFeaturesEnum::BrProb));
+  return {FuncEntryCountEnabled, BBFreqEnabled, BrProbEnabled,
           MF.hasBBSections() && NumMBBSectionRanges > 1};
 }
 
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 21d0d070c247f48..daad82d26da652e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -236,6 +236,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpNegateRAState:
     OutStreamer->emitCFINegateRAState(Loc);
     break;
+  case MCCFIInstruction::OpNegateRAStateWithPC:
+    OutStreamer->emitCFINegateRAStateWithPC(Loc);
+    break;
   case MCCFIInstruction::OpSameValue:
     OutStreamer->emitCFISameValue(Inst.getRegister(), Loc);
     break;
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 1dc278586f1178b..f8de13650680a89 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -645,11 +645,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
   // we don't have to split a block.  At worst we will be introducing 1 new
   // branch instruction, which is likely to be smaller than the 2
   // instructions that would be deleted in the merge.
-  MachineFunction *MF = MBB1->getParent();
-  bool OptForSize =
-      MF->getFunction().hasOptSize() ||
-      (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
-       llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
+  bool OptForSize = llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
+                    llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo);
   return EffectiveTailLen >= 2 && OptForSize &&
          (FullBlockTail1 || FullBlockTail2);
 }
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index f5bedc7b8ecdfc2..4217ec6a1cca8a9 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -260,6 +260,7 @@ void CFIInstrInserter::calculateOutgoingCFAInfo(MBBCFAInfo &MBBInfo) {
       case MCCFIInstruction::OpEscape:
       case MCCFIInstruction::OpWindowSave:
       case MCCFIInstruction::OpNegateRAState:
+      case MCCFIInstruction::OpNegateRAStateWithPC:
       case MCCFIInstruction::OpGnuArgsSize:
       case MCCFIInstruction::OpLabel:
         break;
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index cf5c35fe81b4c71..39fba1d0b527ef6 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -38,7 +38,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeEarlyIfConverterLegacyPass(Registry);
   initializeEarlyIfPredicatorPass(Registry);
   initializeEarlyMachineLICMPass(Registry);
-  initializeEarlyTailDuplicatePass(Registry);
+  initializeEarlyTailDuplicateLegacyPass(Registry);
   initializeExpandLargeDivRemLegacyPassPass(Registry);
   initializeExpandLargeFpConvertLegacyPassPass(Registry);
   initializeExpandMemCmpLegacyPassPass(Registry);
@@ -131,7 +131,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeStripDebugMachineModulePass(Registry);
-  initializeTailDuplicatePass(Registry);
+  initializeTailDuplicateLegacyPass(Registry);
   initializeTargetPassConfigPass(Registry);
   initializeTwoAddressInstructionLegacyPassPass(Registry);
   initializeTypePromotionLegacyPass(Registry);
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 67a35901511417f..5224a6c8d1a3738 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -612,7 +612,6 @@ bool CodeGenPrepare::_run(Function &F) {
       // bypassSlowDivision may create new BBs, but we don't want to reapply the
       // optimization to those blocks.
       BasicBlock *Next = BB->getNextNode();
-      // F.hasOptSize is already checked in the outer if statement.
       if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
         EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
       BB = Next;
@@ -2608,7 +2607,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
   // cold block.  This interacts with our handling for loads and stores to
   // ensure that we can fold all uses of a potential addressing computation
   // into their uses.  TODO: generalize this to work over profiling data
-  if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
+  if (CI->hasFnAttr(Attribute::Cold) &&
       !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
     for (auto &Arg : CI->args()) {
       if (!Arg->getType()->isPointerTy())
@@ -5505,9 +5504,7 @@ static bool FindAllMemoryUses(
       if (CI->hasFnAttr(Attribute::Cold)) {
         // If this is a cold call, we can sink the addressing calculation into
         // the cold path.  See optimizeCallInst
-        bool OptForSize =
-            OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
-        if (!OptForSize)
+        if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
           continue;
       }
 
@@ -7402,7 +7399,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
     SelectKind = TargetLowering::ScalarValSelect;
 
   if (TLI->isSelectSupported(SelectKind) &&
-      (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
+      (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) ||
        llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
     return false;
 
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 6d626de0b4e635e..1de01e402e59e60 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -852,8 +852,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
   // available load sizes.
   const bool IsUsedForZeroCmp =
       IsBCmp || isOnlyUsedInZeroEqualityComparison(CI);
-  bool OptForSize = CI->getFunction()->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
+  bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
   auto Options = TTI->enableMemCmpExpansion(OptForSize,
                                             IsUsedForZeroCmp);
   if (!Options) return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 563a82644134528..5381dce58f9e65d 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -2343,6 +2343,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
                            MachineInstr::copyFlagsFromInstruction(CI));
     return true;
   }
+  case Intrinsic::sincos: {
+    ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+    MIRBuilder.buildFSincos(VRegs[0], VRegs[1],
+                            getOrCreateVReg(*CI.getArgOperand(0)),
+                            MachineInstr::copyFlagsFromInstruction(CI));
+    return true;
+  }
   case Intrinsic::fptosi_sat:
     MIRBuilder.buildFPTOSI_SAT(getOrCreateVReg(CI),
                                getOrCreateVReg(*CI.getArgOperand(0)));
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 722ceea29c951c9..dcbbb0871a8445e 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1619,13 +1619,6 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector,
   llvm_unreachable("Invalid boolean contents");
 }
 
-bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
-                            ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
-  const auto &F = MBB.getParent()->getFunction();
-  return F.hasOptSize() || F.hasMinSize() ||
-         llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
-}
-
 void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
                             LostDebugLocObserver *LocObserver,
                             SmallInstListTy &DeadInstChain) {
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 21a316cf99a217e..a0b6bf445fa8af1 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -890,8 +890,7 @@ float LiveIntervals::getSpillWeight(bool isDef, bool isUse,
   const auto *MF = MBB->getParent();
   // When optimizing for size we only consider the codesize impact of spilling
   // the register, not the runtime impact.
-  if (PSI && (MF->getFunction().hasOptSize() ||
-              llvm::shouldOptimizeForSize(MF, PSI, MBFI)))
+  if (PSI && llvm::shouldOptimizeForSize(MF, PSI, MBFI))
     return Weight;
   return Weight * MBFI->getBlockFreqRelativeToEntryBlock(MBB);
 }
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index a57233cf37da014..bc8c59381a40e17 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -244,6 +244,41 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
   return false;
 }
 
+LaneBitmask LiveRegMatrix::checkInterferenceLanes(SlotIndex Start,
+                                                  SlotIndex End,
+                                                  MCRegister PhysReg) {
+  // Construct artificial live range containing only one segment [Start, End).
+  VNInfo valno(0, Start);
+  LiveRange::Segment Seg(Start, End, &valno);
+  LiveRange LR;
+  LR.addSegment(Seg);
+
+  LaneBitmask InterferingLanes;
+
+  // Check for interference with that segment
+  for (MCRegUnitMaskIterator MCRU(PhysReg, TRI); MCRU.isValid(); ++MCRU) {
+    auto [Unit, Lanes] = *MCRU;
+    // LR is stack-allocated. LiveRegMatrix caches queries by a key that
+    // includes the address of the live range. If (for the same reg unit) this
+    // checkInterference overload is called twice, without any other query()
+    // calls in between (on heap-allocated LiveRanges)  - which would invalidate
+    // the cached query - the LR address seen the second time may well be the
+    // same as that seen the first time, while the Start/End/valno may not - yet
+    // the same cached result would be fetched. To avoid that, we don't cache
+    // this query.
+    //
+    // FIXME: the usability of the Query API needs to be improved to avoid
+    // subtle bugs due to query identity. Avoiding caching, for example, would
+    // greatly simplify things.
+    LiveIntervalUnion::Query Q;
+    Q.reset(UserTag, LR, Matrix[Unit]);
+    if (Q.checkInterference())
+      InterferingLanes |= Lanes;
+  }
+
+  return InterferingLanes;
+}
+
 Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
   const LiveInterval *VRegInterval = nullptr;
   for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 0809f88fde56b1c..5a3806ce57335ae 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -238,6 +238,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("window_save", MIToken::kw_cfi_window_save)
       .Case("negate_ra_sign_state",
             MIToken::kw_cfi_aarch64_negate_ra_sign_state)
+      .Case("negate_ra_sign_state_with_pc",
+            MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc)
       .Case("blockaddress", MIToken::kw_blockaddress)
       .Case("intrinsic", MIToken::kw_intrinsic)
       .Case("target-index", MIToken::kw_target_index)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 22547483a8a86be..3931da3eaae1d3a 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -96,6 +96,7 @@ struct MIToken {
     kw_cfi_undefined,
     kw_cfi_window_save,
     kw_cfi_aarch64_negate_ra_sign_state,
+    kw_cfi_aarch64_negate_ra_sign_state_with_pc,
     kw_blockaddress,
     kw_intrinsic,
     kw_target_index,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 7aaa0f409d5ef9d..45847b5830da656 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -2576,6 +2576,10 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
     CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
     break;
+  case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc:
+    CFIIndex =
+        MF.addFrameInst(MCCFIInstruction::createNegateRAStateWithPC(nullptr));
+    break;
   case MIToken::kw_cfi_escape: {
     std::string Values;
     if (parseCFIEscapeValues(Values))
@@ -2931,6 +2935,7 @@ bool MIParser::parseMachineOperand(const unsigned OpCode, const unsigned OpIdx,
   case MIToken::kw_cfi_undefined:
   case MIToken::kw_cfi_window_save:
   case MIToken::kw_cfi_aarch64_negate_ra_sign_state:
+  case MIToken::kw_cfi_aarch64_negate_ra_sign_state_with_pc:
     return parseCFIOperand(Dest);
   case MIToken::kw_blockaddress:
     return parseBlockAddressOperand(Dest);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index dd5220b4599f959..d1dced9ef28dca5 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2189,9 +2189,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
   // i.e. when the layout predecessor does not fallthrough to the loop header.
   // In practice this never happens though: there always seems to be a preheader
   // that can fallthrough and that is also placed before the header.
-  bool OptForSize = F->getFunction().hasOptSize() ||
-                    llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
-  if (OptForSize)
+  if (llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get()))
     return L.getHeader();
 
   MachineBasicBlock *OldTop = nullptr;
@@ -3511,7 +3509,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   initTailDupThreshold();
 
   const bool OptForSize =
-      MF.getFunction().hasOptSize() ||
       llvm::shouldOptimizeForSize(&MF, PSI, &MBFI->getMBFI());
   // Determine whether to use ext-tsp for perf/size optimization. The method
   // is beneficial only for instances with at least 3 basic blocks and it can be
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 5bfc1d63ac37640..141cc1f35d66c3c 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -77,9 +77,6 @@ class MachineCombiner : public MachineFunctionPass {
 
   TargetSchedModel TSchedModel;
 
-  /// True if optimizing for code size.
-  bool OptSize = false;
-
 public:
   static char ID;
   MachineCombiner() : MachineFunctionPass(ID) {
@@ -571,7 +568,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
   SparseSet<LiveRegUnit> RegUnits;
   RegUnits.setUniverse(TRI->getNumRegUnits());
 
-  bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
+  bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
 
   bool DoRegPressureReduce =
       TII->shouldReduceRegisterPressure(MBB, &RegClassInfo);
@@ -733,7 +730,6 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
          &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
          nullptr;
   TraceEnsemble = nullptr;
-  OptSize = MF.getFunction().hasOptSize();
   RegClassInfo.runOnMachineFunction(MF);
 
   LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 0d78c2cafbaf63c..c1bd0bb5b7162e1 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1535,19 +1535,16 @@ bool MachineInstr::isDereferenceableInvariantLoad() const {
   return true;
 }
 
-/// isConstantValuePHI - If the specified instruction is a PHI that always
-/// merges together the same virtual register, return the register, otherwise
-/// return 0.
-unsigned MachineInstr::isConstantValuePHI() const {
+Register MachineInstr::isConstantValuePHI() const {
   if (!isPHI())
-    return 0;
+    return {};
   assert(getNumOperands() >= 3 &&
          "It's illegal to have a PHI without source operands");
 
   Register Reg = getOperand(1).getReg();
   for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
     if (getOperand(i).getReg() != Reg)
-      return 0;
+      return {};
   return Reg;
 }
 
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 793ad75759ccb86..7ea07862b839d02 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -391,12 +391,6 @@ bool MachineLICMImpl::run(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   SchedModel.init(&ST);
 
-  // FIXME: Remove this assignment or convert to an assert? (dead variable PreRegAlloc)
-  // MachineLICM and PostRAMachineLICM were distinguished by introducing
-  // EarlyMachineLICM and MachineLICM respectively to avoid "using an unreliable
-  // MRI::isSSA() check to determine whether register allocation has happened"
-  // (See 4a7c8e7).
-  PreRegAlloc = MRI->isSSA();
   HasProfileData = MF.getFunction().hasProfileData();
 
   if (PreRegAlloc)
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index c0e004555de9598..d9e5e9d9d1e41f0 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -768,6 +768,11 @@ static void printCFI(raw_ostream &OS, const MCCFIInstruction &CFI,
     if (MCSymbol *Label = CFI.getLabel())
       MachineOperand::printSymbol(OS, *Label);
     break;
+  case MCCFIInstruction::OpNegateRAStateWithPC:
+    OS << "negate_ra_sign_state_with_pc ";
+    if (MCSymbol *Label = CFI.getLabel())
+      MachineOperand::printSymbol(OS, *Label);
+    break;
   default:
     // TODO: Print the other CFI Operations.
     OS << "<unserializable cfi directive>";
diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index c7a673b12d8c509..f0a136751bbffaa 100644
--- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -201,7 +201,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB,
 
   // See if the PHI node can be merged to a single value.  This can happen in
   // loop cases when we get a PHI of itself and one other value.
-  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+  if (Register ConstVal = InsertedPHI->isConstantValuePHI()) {
     InsertedPHI->eraseFromParent();
     return ConstVal;
   }
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
index 53bed7397d0992e..4d458f2c2e24b4d 100644
--- a/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -28,6 +28,8 @@ bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
                                  ProfileSummaryInfo *PSI,
                                  const MachineBlockFrequencyInfo *MBFI,
                                  PGSOQueryType QueryType) {
+  if (MF->getFunction().hasOptSize())
+    return true;
   return shouldFuncOptimizeForSizeImpl(MF, PSI, MBFI, QueryType);
 }
 
@@ -36,6 +38,8 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
                                  const MachineBlockFrequencyInfo *MBFI,
                                  PGSOQueryType QueryType) {
   assert(MBB);
+  if (MBB->getParent()->getFunction().hasOptSize())
+    return true;
   return shouldOptimizeForSizeImpl(MBB, PSI, MBFI, QueryType);
 }
 
@@ -44,7 +48,9 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
                                  MBFIWrapper *MBFIW,
                                  PGSOQueryType QueryType) {
   assert(MBB);
-  if (!PSI || !MBFIW)
+  if (MBB->getParent()->getFunction().hasOptSize())
+    return true;
+  if (!MBFIW)
     return false;
   BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
   return shouldOptimizeForSizeImpl(BlockFreq, PSI, &MBFIW->getMBFI(),
diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp
index 6128248a028e3e0..ea291f64bff4321 100644
--- a/llvm/lib/CodeGen/MachineStripDebug.cpp
+++ b/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -50,7 +50,7 @@ struct StripDebugMachineModule : public ModulePass {
         continue;
       MachineFunction &MF = *MaybeMF;
       for (MachineBasicBlock &MBB : MF) {
-        for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+        for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
           if (MI.isDebugInstr()) {
             // FIXME: We should remove all of them. However, AArch64 emits an
             //        invalid `DBG_VALUE $lr` with only one operand instead of
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 61341e1f2d04ce8..55b0eb71ac11fcd 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -431,7 +431,7 @@ PreservedAnalyses SelectOptimizeImpl::run(Function &F,
   BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
 
   // When optimizing for size, selects are preferable over branches.
-  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
+  if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
     return PreservedAnalyses::all();
 
   LI = &FAM.getResult<LoopAnalysis>(F);
@@ -467,7 +467,7 @@ bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
   TSchedModel.init(TSI);
 
   // When optimizing for size, selects are preferable over branches.
-  if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI))
+  if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
     return false;
 
   return optimizeSelects(F);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ad2d2ede302af84..ceaf5d664131c3a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7353,6 +7353,26 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
     return R;
 
+  // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
+  // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
+  SDValue X, Y, Z, NotY;
+  for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
+    if (sd_match(N,
+                 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
+        sd_match(NotY, m_Not(m_Value(Y))) &&
+        (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
+      return DAG.getNode(ISD::AND, DL, VT, X,
+                         DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
+
+  // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z)))
+  for (unsigned Opc : {ISD::ROTL, ISD::ROTR})
+    if (sd_match(N, m_And(m_Value(X),
+                          m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) &&
+        sd_match(NotY, m_Not(m_Value(Y))) &&
+        (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
+      return DAG.getNode(ISD::AND, DL, VT, X,
+                         DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT));
+
   // Masking the negated extension of a boolean is just the zero-extended
   // boolean:
   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e0a03383358b76a..6ba12cfb8c51481 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3714,6 +3714,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
     break;
   }
+  case ISD::FSINCOS: {
+    if (isSinCosLibcallAvailable(Node, TLI))
+      break;
+    EVT VT = Node->getValueType(0);
+    SDValue Op = Node->getOperand(0);
+    SDNodeFlags Flags = Node->getFlags();
+    Tmp1 = DAG.getNode(ISD::FSIN, dl, VT, Op, Flags);
+    Tmp2 = DAG.getNode(ISD::FCOS, dl, VT, Op, Flags);
+    Results.append({Tmp1, Tmp2});
+    break;
+  }
   case ISD::FMAD:
     llvm_unreachable("Illegal fmad should never be formed");
 
@@ -4364,6 +4375,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(DAG.getNode(ISD::FP_TO_SINT, dl, ResVT, RoundNode));
     break;
   }
+  case ISD::ADDRSPACECAST:
+    Results.push_back(DAG.UnrollVectorOp(Node));
+    break;
   case ISD::GLOBAL_OFFSET_TABLE:
   case ISD::GlobalAddress:
   case ISD::GlobalTLSAddress:
@@ -5586,6 +5600,16 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(Tmp2.getValue(1));
     break;
   }
+  case ISD::FSINCOS: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FSINCOS, dl, DAG.getVTList(NVT, NVT), Tmp1,
+                       Node->getFlags());
+    Tmp3 = DAG.getIntPtrConstant(0, dl, /*isTarget=*/true);
+    for (unsigned ResNum = 0; ResNum < Node->getNumValues(); ResNum++)
+      Results.push_back(
+          DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2.getValue(ResNum), Tmp3));
+    break;
+  }
   case ISD::FFLOOR:
   case ISD::FCEIL:
   case ISD::FRINT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 73c258f0f6f18c2..fa2731ff7dbda75 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -129,6 +129,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FLDEXP:
     case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
     case ISD::FFREXP:        R = SoftenFloatRes_FFREXP(N); break;
+    case ISD::FSINCOS:       R = SoftenFloatRes_FSINCOS(N); break;
     case ISD::STRICT_FREM:
     case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
     case ISD::STRICT_FRINT:
@@ -774,6 +775,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
   return ReturnVal;
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSINCOS(SDNode *N) {
+  assert(!N->isStrictFPOpcode() && "strictfp not implemented for fsincos");
+  EVT VT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFSINCOS(VT);
+
+  if (!TLI.getLibcallName(LC))
+    return SDValue();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue StackSlotSin = DAG.CreateStackTemporary(NVT);
+  SDValue StackSlotCos = DAG.CreateStackTemporary(NVT);
+
+  SDLoc DL(N);
+
+  TargetLowering::MakeLibCallOptions CallOptions;
+  std::array Ops{GetSoftenedFloat(N->getOperand(0)), StackSlotSin,
+                 StackSlotCos};
+  std::array OpsVT{VT, StackSlotSin.getValueType(),
+                   StackSlotCos.getValueType()};
+
+  // TODO: setTypeListBeforeSoften can't properly express multiple return types,
+  // but since both returns have the same type for sincos it should be okay.
+  CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true);
+
+  auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL,
+                                            /*Chain=*/SDValue());
+
+  auto CreateStackLoad = [&, Chain = Chain](SDValue StackSlot) {
+    int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+    auto PtrInfo =
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+    return DAG.getLoad(NVT, DL, Chain, StackSlot, PtrInfo);
+  };
+  SetSoftenedFloat(SDValue(N, 0), CreateStackLoad(StackSlotSin));
+  SetSoftenedFloat(SDValue(N, 1), CreateStackLoad(StackSlotCos));
+
+  return SDValue();
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
   return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
                                                RTLIB::REM_F32,
@@ -2704,6 +2744,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FLDEXP:     R = PromoteFloatRes_ExpOp(N); break;
     case ISD::FFREXP:     R = PromoteFloatRes_FFREXP(N); break;
 
+    case ISD::FSINCOS:
+      R = PromoteFloatRes_FSINCOS(N);
+      break;
+
     case ISD::FP_ROUND:   R = PromoteFloatRes_FP_ROUND(N); break;
     case ISD::STRICT_FP_ROUND:
       R = PromoteFloatRes_STRICT_FP_ROUND(N);
@@ -2899,6 +2943,20 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) {
   return Res;
 }
 
+SDValue DAGTypeLegalizer::PromoteFloatRes_FSINCOS(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Op = GetPromotedFloat(N->getOperand(0));
+  SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, NVT}, Op);
+
+  for (unsigned ResNum = 0, NumValues = N->getNumValues(); ResNum < NumValues;
+       ++ResNum) {
+    SetPromotedFloat(SDValue(N, ResNum), Res.getValue(ResNum));
+  }
+
+  return SDValue();
+}
+
 // Explicit operation to reduce precision.  Reduce the value to half precision
 // and promote it back to the legal type.
 SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
@@ -3148,6 +3206,10 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
 
   case ISD::FFREXP:      R = SoftPromoteHalfRes_FFREXP(N); break;
 
+  case ISD::FSINCOS:
+    R = SoftPromoteHalfRes_FSINCOS(N);
+    break;
+
   case ISD::LOAD:        R = SoftPromoteHalfRes_LOAD(N); break;
   case ISD::ATOMIC_LOAD:
     R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
@@ -3304,6 +3366,27 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) {
   return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
 }
 
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FSINCOS(SDNode *N) {
+  EVT OVT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+  SDValue Op = GetSoftPromotedHalf(N->getOperand(0));
+  SDLoc dl(N);
+
+  // Promote to the larger FP type.
+  Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op);
+  SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(NVT, NVT), Op);
+
+  // Convert back to FP16 as an integer.
+  ISD::NodeType Truncate = GetPromotionOpcode(NVT, OVT);
+  for (unsigned ResNum = 0, NumValues = N->getNumValues(); ResNum < NumValues;
+       ++ResNum) {
+    SDValue Trunc = DAG.getNode(Truncate, dl, MVT::i16, Res.getValue(ResNum));
+    SetSoftPromotedHalf(SDValue(N, ResNum), Trunc);
+  }
+
+  return SDValue();
+}
+
 SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
   EVT RVT = N->getValueType(0);
   bool IsStrict = N->isStrictFPOpcode();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 868da25ca8cb474..8d3458aaab9f865 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -597,6 +597,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FPOW(SDNode *N);
   SDValue SoftenFloatRes_ExpOp(SDNode *N);
   SDValue SoftenFloatRes_FFREXP(SDNode *N);
+  SDValue SoftenFloatRes_FSINCOS(SDNode *N);
   SDValue SoftenFloatRes_FREEZE(SDNode *N);
   SDValue SoftenFloatRes_FREM(SDNode *N);
   SDValue SoftenFloatRes_FRINT(SDNode *N);
@@ -744,6 +745,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue PromoteFloatRes_FMAD(SDNode *N);
   SDValue PromoteFloatRes_ExpOp(SDNode *N);
   SDValue PromoteFloatRes_FFREXP(SDNode *N);
+  SDValue PromoteFloatRes_FSINCOS(SDNode *N);
   SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
   SDValue PromoteFloatRes_STRICT_FP_ROUND(SDNode *N);
   SDValue PromoteFloatRes_LOAD(SDNode *N);
@@ -792,6 +794,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
   SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
   SDValue SoftPromoteHalfRes_FFREXP(SDNode *N);
+  SDValue SoftPromoteHalfRes_FSINCOS(SDNode *N);
   SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
   SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
   SDValue SoftPromoteHalfRes_ATOMIC_LOAD(SDNode *N);
@@ -863,7 +866,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
 
   SDValue ScalarizeVecRes_FIX(SDNode *N);
-  SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo);
+  SDValue ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo);
 
   // Vector Operand Scalarization: <1 x ty> -> ty.
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -917,7 +920,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo, SDValue &Lo,
+                                         SDValue &Hi);
   void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -1068,6 +1072,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_ExpOp(SDNode *N);
   SDValue WidenVecRes_Unary(SDNode *N);
   SDValue WidenVecRes_InregOp(SDNode *N);
+  SDValue WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo);
+  void ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode,
+                                unsigned WidenResNo);
 
   // Widen Vector Operand.
   bool WidenVectorOperand(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index a8042fc3e7a69a2..c80da28b3dc34d5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -452,6 +452,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::UMULO:
   case ISD::FCANONICALIZE:
   case ISD::FFREXP:
+  case ISD::FSINCOS:
   case ISD::SADDSAT:
   case ISD::UADDSAT:
   case ISD::SSUBSAT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 50e2a923699c8ad..5409ae7d9671cb0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -130,7 +130,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
     R = ScalarizeVecRes_ADDRSPACECAST(N);
     break;
   case ISD::FFREXP:
-    R = ScalarizeVecRes_FFREXP(N, ResNo);
+  case ISD::FSINCOS:
+    R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
     break;
   case ISD::ADD:
   case ISD::AND:
@@ -276,7 +277,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
                      Op2, N->getFlags());
 }
 
-SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) {
+SDValue
+DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N,
+                                                        unsigned ResNo) {
   assert(N->getValueType(0).getVectorNumElements() == 1 &&
          "Unexpected vector type!");
   SDValue Elt = GetScalarizedVector(N->getOperand(0));
@@ -1253,7 +1256,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_ADDRSPACECAST(N, Lo, Hi);
     break;
   case ISD::FFREXP:
-    SplitVecRes_FFREXP(N, ResNo, Lo, Hi);
+  case ISD::FSINCOS:
+    SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
     break;
 
   case ISD::ANY_EXTEND:
@@ -2615,8 +2619,10 @@ void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo,
   Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS);
 }
 
-void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo,
-                                          SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N,
+                                                         unsigned ResNo,
+                                                         SDValue &Lo,
+                                                         SDValue &Hi) {
   SDLoc dl(N);
   auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
   auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
@@ -4429,6 +4435,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
 //  Result Vector Widening
 //===----------------------------------------------------------------------===//
 
+void DAGTypeLegalizer::ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode,
+                                                unsigned WidenResNo) {
+  unsigned NumResults = N->getNumValues();
+  for (unsigned ResNo = 0; ResNo < NumResults; ResNo++) {
+    if (ResNo == WidenResNo)
+      continue;
+    EVT ResVT = N->getValueType(ResNo);
+    if (getTypeAction(ResVT) == TargetLowering::TypeWidenVector) {
+      SetWidenedVector(SDValue(N, ResNo), SDValue(WidenNode, ResNo));
+    } else {
+      SDLoc DL(N);
+      SDValue ResVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT,
+                                   SDValue(WidenNode, ResNo),
+                                   DAG.getVectorIdxConstant(0, DL));
+      ReplaceValueWith(SDValue(N, ResNo), ResVal);
+    }
+  }
+}
+
 void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
 
@@ -4448,6 +4473,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
         TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
       Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
+      if (N->getNumValues() > 1)
+        ReplaceOtherWidenResults(N, Res.getNode(), ResNo);
       return true;
     }
     return false;
@@ -4752,6 +4779,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VP_FSHR:
     Res = WidenVecRes_Ternary(N);
     break;
+  case ISD::FFREXP:
+  case ISD::FSINCOS: {
+    if (!unrollExpandedOp())
+      Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
+    break;
+  }
   }
 
   // If Res is null, the sub-method took care of registering the result.
@@ -5500,6 +5533,32 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
                      WidenVT, WidenLHS, DAG.getValueType(ExtVT));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N,
+                                                            unsigned ResNo) {
+  EVT VT0 = N->getValueType(0);
+  EVT VT1 = N->getValueType(1);
+
+  assert(VT0.isVector() && VT1.isVector() &&
+         VT0.getVectorElementCount() == VT1.getVectorElementCount() &&
+         "expected both results to be vectors of matching element count");
+
+  LLVMContext &Ctx = *DAG.getContext();
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+
+  EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo));
+  ElementCount WidenEC = WidenVT.getVectorElementCount();
+
+  EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC);
+  EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC);
+
+  SDNode *WidenNode =
+      DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp)
+          .getNode();
+
+  ReplaceOtherWidenResults(N, WidenNode, ResNo);
+  return SDValue(WidenNode, ResNo);
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
   SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
   return GetWidenedVector(WidenVec);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0770355ec18c0b5..5403d787861d46e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1370,8 +1370,7 @@ SelectionDAG::~SelectionDAG() {
 }
 
 bool SelectionDAG::shouldOptForSize() const {
-  return MF->getFunction().hasOptSize() ||
-      llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI);
+  return llvm::shouldOptimizeForSize(FLI->MBB->getBasicBlock(), PSI, BFI);
 }
 
 void SelectionDAG::allnodes_clear() {
@@ -12534,8 +12533,15 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
       Scalars1.push_back(EltOp.getValue(1));
     }
 
-    SDValue Vec0 = getBuildVector(VT, dl, Scalars0);
-    SDValue Vec1 = getBuildVector(VT1, dl, Scalars1);
+    for (; i < ResNE; ++i) {
+      Scalars0.push_back(getUNDEF(EltVT));
+      Scalars1.push_back(getUNDEF(EltVT1));
+    }
+
+    EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+    EVT VecVT1 = EVT::getVectorVT(*getContext(), EltVT1, ResNE);
+    SDValue Vec0 = getBuildVector(VecVT, dl, Scalars0);
+    SDValue Vec1 = getBuildVector(VecVT1, dl, Scalars1);
     return getMergeValues({Vec0, Vec1}, dl);
   }
 
@@ -12584,6 +12590,14 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
       Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
                                 Operands[0],
                                 getValueType(ExtVT)));
+      break;
+    }
+    case ISD::ADDRSPACECAST: {
+      const auto *ASC = cast<AddrSpaceCastSDNode>(N);
+      Scalars.push_back(getAddrSpaceCast(dl, EltVT, Operands[0],
+                                         ASC->getSrcAddressSpace(),
+                                         ASC->getDestAddressSpace()));
+      break;
     }
     }
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8450553743074c3..203e80e36b46d9e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6936,12 +6936,24 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1)), Flags));
     return;
+  case Intrinsic::sincos:
   case Intrinsic::frexp: {
+    unsigned Opcode;
+    switch (Intrinsic) {
+    default:
+      llvm_unreachable("unexpected intrinsic");
+    case Intrinsic::sincos:
+      Opcode = ISD::FSINCOS;
+      break;
+    case Intrinsic::frexp:
+      Opcode = ISD::FFREXP;
+      break;
+    }
     SmallVector<EVT, 2> ValueVTs;
     ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
     SDVTList VTs = DAG.getVTList(ValueVTs);
-    setValue(&I,
-             DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0))));
+    setValue(
+        &I, DAG.getNode(Opcode, sdl, VTs, getValue(I.getArgOperand(0)), Flags));
     return;
   }
   case Intrinsic::arithmetic_fence: {
diff --git a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 232e5e2bb886dfd..f8ab44124b3ae8e 100644
--- a/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -242,13 +242,12 @@ bool ShadowStackGCLoweringImpl::doInitialization(Module &M) {
   //   void *Roots[];          // Stack roots (in-place array, so we pretend).
   // };
 
-  StackEntryTy = StructType::create(M.getContext(), "gc_stackentry");
+  PointerType *StackEntryPtrTy = PointerType::getUnqual(M.getContext());
 
   EltTys.clear();
-  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
+  EltTys.push_back(StackEntryPtrTy);
   EltTys.push_back(FrameMapPtrTy);
-  StackEntryTy->setBody(EltTys);
-  PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+  StackEntryTy = StructType::create(EltTys, "gc_stackentry");
 
   // Get the root chain if it already exists.
   Head = M.getGlobalVariable("llvm_gc_root_chain");
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index 25f20d9c899bb07..b698ca675b65e2d 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -12,13 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/TailDuplication.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/TailDuplicator.h"
+#include "llvm/IR/Analysis.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/PassRegistry.h"
@@ -29,13 +32,13 @@ using namespace llvm;
 
 namespace {
 
-class TailDuplicateBase : public MachineFunctionPass {
+class TailDuplicateBaseLegacy : public MachineFunctionPass {
   TailDuplicator Duplicator;
   std::unique_ptr<MBFIWrapper> MBFIW;
   bool PreRegAlloc;
 public:
-  TailDuplicateBase(char &PassID, bool PreRegAlloc)
-    : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
+  TailDuplicateBaseLegacy(char &PassID, bool PreRegAlloc)
+      : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -47,19 +50,19 @@ class TailDuplicateBase : public MachineFunctionPass {
   }
 };
 
-class TailDuplicate : public TailDuplicateBase {
+class TailDuplicateLegacy : public TailDuplicateBaseLegacy {
 public:
   static char ID;
-  TailDuplicate() : TailDuplicateBase(ID, false) {
-    initializeTailDuplicatePass(*PassRegistry::getPassRegistry());
+  TailDuplicateLegacy() : TailDuplicateBaseLegacy(ID, false) {
+    initializeTailDuplicateLegacyPass(*PassRegistry::getPassRegistry());
   }
 };
 
-class EarlyTailDuplicate : public TailDuplicateBase {
+class EarlyTailDuplicateLegacy : public TailDuplicateBaseLegacy {
 public:
   static char ID;
-  EarlyTailDuplicate() : TailDuplicateBase(ID, true) {
-    initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry());
+  EarlyTailDuplicateLegacy() : TailDuplicateBaseLegacy(ID, true) {
+    initializeEarlyTailDuplicateLegacyPass(*PassRegistry::getPassRegistry());
   }
 
   MachineFunctionProperties getClearedProperties() const override {
@@ -70,17 +73,18 @@ class EarlyTailDuplicate : public TailDuplicateBase {
 
 } // end anonymous namespace
 
-char TailDuplicate::ID;
-char EarlyTailDuplicate::ID;
+char TailDuplicateLegacy::ID;
+char EarlyTailDuplicateLegacy::ID;
 
-char &llvm::TailDuplicateID = TailDuplicate::ID;
-char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID;
+char &llvm::TailDuplicateLegacyID = TailDuplicateLegacy::ID;
+char &llvm::EarlyTailDuplicateLegacyID = EarlyTailDuplicateLegacy::ID;
 
-INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false)
-INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication",
+INITIALIZE_PASS(TailDuplicateLegacy, DEBUG_TYPE, "Tail Duplication", false,
+                false)
+INITIALIZE_PASS(EarlyTailDuplicateLegacy, "early-tailduplication",
                 "Early Tail Duplication", false, false)
 
-bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
+bool TailDuplicateBaseLegacy::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
@@ -100,3 +104,36 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
 
   return MadeChange;
 }
+
+template <typename DerivedT, bool PreRegAlloc>
+PreservedAnalyses TailDuplicatePassBase<DerivedT, PreRegAlloc>::run(
+    MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(static_cast<DerivedT &>(*this), MF);
+
+  if (MF.getFunction().hasOptNone())
+    return PreservedAnalyses::all();
+
+  auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
+  auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
+                  .getCachedResult<ProfileSummaryAnalysis>(
+                      *MF.getFunction().getParent());
+  auto *MBFI = (PSI && PSI->hasProfileSummary()
+                    ? &MFAM.getResult<MachineBlockFrequencyAnalysis>(MF)
+                    : nullptr);
+  if (MBFI)
+    MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
+
+  TailDuplicator Duplicator;
+  Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
+                    /*LayoutMode=*/false);
+  bool MadeChange = false;
+  while (Duplicator.tailDuplicateBlocks())
+    MadeChange = true;
+
+  if (!MadeChange)
+    return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses();
+}
+
+template class llvm::TailDuplicatePassBase<TailDuplicatePass, false>;
+template class llvm::TailDuplicatePassBase<EarlyTailDuplicatePass, true>;
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index c5fa4e6211a6310..3f2e1511d403a01 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -586,13 +586,11 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  bool OptForSize = MF->getFunction().hasOptSize() ||
-                    llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
   if (TailDupSize == 0)
     MaxDuplicateCount = TailDuplicateSize;
   else
     MaxDuplicateCount = TailDupSize;
-  if (OptForSize)
+  if (llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI))
     MaxDuplicateCount = 1;
 
   // If the block to be duplicated ends in an unanalyzable fallthrough, don't
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 7a28f7892cbf310..5bcde0e1bbec88b 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -773,8 +773,9 @@ void TargetLoweringBase::initActions() {
     setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
 
     // These library functions default to expand.
-    setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT,
-                       Expand);
+    setOperationAction(
+        {ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP, ISD::FSINCOS}, VT,
+        Expand);
 
     // These operations default to expand for vector types.
     if (VT.isVector())
@@ -1633,7 +1634,6 @@ bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI,
   // performed in findJumpTable() in SelectionDAGBuiler and
   // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
   const bool OptForSize =
-      SI->getParent()->getParent()->hasOptSize() ||
       llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI);
   const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
   const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 12225c9946e9fc7..aff74104006e5a7 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -290,10 +290,10 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
   if (StandardID == &BranchFolderPassID)
     return applyDisable(TargetID, DisableBranchFold);
 
-  if (StandardID == &TailDuplicateID)
+  if (StandardID == &TailDuplicateLegacyID)
     return applyDisable(TargetID, DisableTailDuplicate);
 
-  if (StandardID == &EarlyTailDuplicateID)
+  if (StandardID == &EarlyTailDuplicateLegacyID)
     return applyDisable(TargetID, DisableEarlyTailDup);
 
   if (StandardID == &MachineBlockPlacementID)
@@ -1279,7 +1279,7 @@ void TargetPassConfig::addMachinePasses() {
 /// Add passes that optimize machine instructions in SSA form.
 void TargetPassConfig::addMachineSSAOptimization() {
   // Pre-ra tail duplication.
-  addPass(&EarlyTailDuplicateID);
+  addPass(&EarlyTailDuplicateLegacyID);
 
   // Optimize PHIs before DCE: removing dead PHI cycles may make more
   // instructions dead.
@@ -1507,7 +1507,7 @@ void TargetPassConfig::addMachineLateOptimization() {
   // performance for targets that require Structured Control Flow.
   // In addition it can also make CFG irreducible. Thus we disable it.
   if (!TM->requiresStructuredCFG())
-    addPass(&TailDuplicateID);
+    addPass(&TailDuplicateLegacyID);
 
   // Copy propagation.
   addPass(&MachineCopyPropagationID);
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 46253b1743d97e0..26a12512c87be07 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStacks.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -203,6 +204,7 @@ class VirtRegRewriter : public MachineFunctionPass {
   MachineRegisterInfo *MRI = nullptr;
   SlotIndexes *Indexes = nullptr;
   LiveIntervals *LIS = nullptr;
+  LiveRegMatrix *LRM = nullptr;
   VirtRegMap *VRM = nullptr;
   LiveDebugVariables *DebugVars = nullptr;
   DenseSet<Register> RewriteRegs;
@@ -215,6 +217,9 @@ class VirtRegRewriter : public MachineFunctionPass {
   void handleIdentityCopy(MachineInstr &MI);
   void expandCopyBundle(MachineInstr &MI) const;
   bool subRegLiveThrough(const MachineInstr &MI, MCRegister SuperPhysReg) const;
+  LaneBitmask liveOutUndefPhiLanesForUndefSubregDef(
+      const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
+      MCPhysReg PhysReg, const MachineInstr &MI) const;
 
 public:
   static char ID;
@@ -247,6 +252,7 @@ INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter",
 INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrixWrapperLegacy)
 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
 INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter",
@@ -262,6 +268,7 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
   AU.addRequired<VirtRegMapWrapperLegacy>();
+  AU.addRequired<LiveRegMatrixWrapperLegacy>();
 
   if (!ClearVirtRegs)
     AU.addPreserved<LiveDebugVariables>();
@@ -276,6 +283,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
   MRI = &MF->getRegInfo();
   Indexes = &getAnalysis<SlotIndexesWrapperPass>().getSI();
   LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS();
+  LRM = &getAnalysis<LiveRegMatrixWrapperLegacy>().getLRM();
   VRM = &getAnalysis<VirtRegMapWrapperLegacy>().getVRM();
   DebugVars = &getAnalysis<LiveDebugVariables>();
   LLVM_DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
@@ -548,6 +556,40 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
   return false;
 }
 
+/// Compute a lanemask for undef lanes which need to be preserved out of the
+/// defining block for a register assignment for a subregister def. \p PhysReg
+/// is assigned to \p LI, which is the main range.
+LaneBitmask VirtRegRewriter::liveOutUndefPhiLanesForUndefSubregDef(
+    const LiveInterval &LI, const MachineBasicBlock &MBB, unsigned SubReg,
+    MCPhysReg PhysReg, const MachineInstr &MI) const {
+  LaneBitmask UndefMask = ~TRI->getSubRegIndexLaneMask(SubReg);
+  LaneBitmask LiveOutUndefLanes;
+
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    // Figure out which lanes are undef live into a successor.
+    LaneBitmask NeedImpDefLanes = UndefMask & SR.LaneMask;
+    if (NeedImpDefLanes.any() && !LIS->isLiveOutOfMBB(SR, &MBB)) {
+      for (const MachineBasicBlock *Succ : MBB.successors()) {
+        if (LIS->isLiveInToMBB(SR, Succ))
+          LiveOutUndefLanes |= NeedImpDefLanes;
+      }
+    }
+  }
+
+  SlotIndex MIIndex = LIS->getInstructionIndex(MI);
+  SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
+  LaneBitmask InterferingLanes =
+      LRM->checkInterferenceLanes(BeforeMIUses, MIIndex.getRegSlot(), PhysReg);
+  LiveOutUndefLanes &= ~InterferingLanes;
+
+  LLVM_DEBUG(if (LiveOutUndefLanes.any()) {
+    dbgs() << "Need live out undef defs for " << printReg(PhysReg)
+           << LiveOutUndefLanes << " from " << printMBBReference(MBB) << '\n';
+  });
+
+  return LiveOutUndefLanes;
+}
+
 void VirtRegRewriter::rewrite() {
   bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
   SmallVector<Register, 8> SuperDeads;
@@ -602,6 +644,32 @@ void VirtRegRewriter::rewrite() {
                 MO.setIsUndef(true);
             } else if (!MO.isDead()) {
               assert(MO.isDef());
+              if (MO.isUndef()) {
+                const LiveInterval &LI = LIS->getInterval(VirtReg);
+
+                LaneBitmask LiveOutUndefLanes =
+                    liveOutUndefPhiLanesForUndefSubregDef(LI, *MBBI, SubReg,
+                                                          PhysReg, MI);
+                if (LiveOutUndefLanes.any()) {
+                  SmallVector<unsigned, 16> CoveringIndexes;
+
+                  // TODO: Just use one super register def if none of the lanes
+                  // are needed?
+                  if (!TRI->getCoveringSubRegIndexes(
+                          *MRI, MRI->getRegClass(VirtReg), LiveOutUndefLanes,
+                          CoveringIndexes))
+                    llvm_unreachable(
+                        "cannot represent required subregister defs");
+
+                  // Try to represent the minimum needed live out def as a
+                  // sequence of subregister defs.
+                  //
+                  // FIXME: It would be better if we could directly represent
+                  // liveness with a lanemask instead of spamming operands.
+                  for (unsigned SubIdx : CoveringIndexes)
+                    SuperDefs.push_back(TRI->getSubReg(PhysReg, SubIdx));
+                }
+              }
             }
           }
 
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index aff26824dda1049..96cb86ad4c37117 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -288,6 +288,7 @@ Error CFIProgram::parse(DWARFDataExtractor Data, uint64_t *Offset,
     case DW_CFA_remember_state:
     case DW_CFA_restore_state:
     case DW_CFA_GNU_window_save:
+    case DW_CFA_AARCH64_negate_ra_state_with_pc:
       // No operands
       addInstruction(Opcode);
       break;
@@ -666,6 +667,28 @@ Error UnwindTable::parseRows(const CFIProgram &CFIP, UnwindRow &Row,
       }
       break;
 
+    case dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc: {
+      constexpr uint32_t AArch64DWARFPAuthRaState = 34;
+      auto LRLoc = Row.getRegisterLocations().getRegisterLocation(
+          AArch64DWARFPAuthRaState);
+      if (LRLoc) {
+        if (LRLoc->getLocation() == UnwindLocation::Constant) {
+          // Toggle the constant value of bits[1:0] from 0 to 1 or 1 to 0.
+          LRLoc->setConstant(LRLoc->getConstant() ^ 0x3);
+        } else {
+          return createStringError(
+              errc::invalid_argument,
+              "%s encountered when existing rule for this register is not "
+              "a constant",
+              CFIP.callFrameString(Inst.Opcode).str().c_str());
+        }
+      } else {
+        Row.getRegisterLocations().setRegisterLocation(
+            AArch64DWARFPAuthRaState, UnwindLocation::createIsConstant(0x3));
+      }
+      break;
+    }
+
     case dwarf::DW_CFA_undefined: {
       llvm::Expected<uint64_t> RegNum = Inst.getOperandAsUnsigned(CFIP, 0);
       if (!RegNum)
@@ -847,6 +870,7 @@ CFIProgram::getOperandTypes() {
   DECLARE_OP0(DW_CFA_remember_state);
   DECLARE_OP0(DW_CFA_restore_state);
   DECLARE_OP0(DW_CFA_GNU_window_save);
+  DECLARE_OP0(DW_CFA_AARCH64_negate_ra_state_with_pc);
   DECLARE_OP1(DW_CFA_GNU_args_size, OT_Offset);
   DECLARE_OP0(DW_CFA_nop);
 
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index f09975331bba84c..42622ea12152ab7 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -1056,7 +1056,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     *((double*)Ptr) = Val.DoubleVal;
     break;
   case Type::X86_FP80TyID:
-    memcpy(Ptr, Val.IntVal.getRawData(), 10);
+    memcpy(static_cast<void *>(Ptr), Val.IntVal.getRawData(), 10);
     break;
   case Type::PointerTyID:
     // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
diff --git a/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp b/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp
index 4ef217e6c562db2..81294cad4d7d426 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.cpp
@@ -52,17 +52,18 @@ void JITLinkRedirectableSymbolManager::emitRedirectableSymbols(
     return;
   }
 
+  // FIXME: return stubs to the pool here too.
   if (auto Err = R->replace(absoluteSymbols(NewSymbolDefs))) {
     ES.reportError(std::move(Err));
     R->failMaterialization();
     return;
   }
 
-  auto Err = R->withResourceKeyDo([&](ResourceKey Key) {
-    TrackedResources[Key].insert(TrackedResources[Key].end(), Symbols.begin(),
-                                 Symbols.end());
-  });
-  if (Err) {
+  // FIXME: return stubs to the pool here too.
+  if (auto Err = R->withResourceKeyDo([&](ResourceKey Key) {
+        TrackedResources[Key].insert(TrackedResources[Key].end(),
+                                     Symbols.begin(), Symbols.end());
+      })) {
     ES.reportError(std::move(Err));
     R->failMaterialization();
     return;
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp
index 7f4c2934d026add..11e8eb7bc3a19b1 100644
--- a/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/MachOObjectFormat.cpp
@@ -42,6 +42,7 @@ StringRef MachOSwift5TypesSectionName = "__TEXT,__swift5_types";
 StringRef MachOSwift5TypeRefSectionName = "__TEXT,__swift5_typeref";
 StringRef MachOSwift5FieldMetadataSectionName = "__TEXT,__swift5_fieldmd";
 StringRef MachOSwift5EntrySectionName = "__TEXT,__swift5_entry";
+StringRef MachOTextTextSectionName = "__TEXT,__text";
 StringRef MachOThreadBSSSectionName = "__DATA,__thread_bss";
 StringRef MachOThreadDataSectionName = "__DATA,__thread_data";
 StringRef MachOThreadVarsSectionName = "__DATA,__thread_vars";
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 010c0bfd3be76b3..7a0a7afcfcb5c9d 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -53,7 +53,15 @@ offloading::getOffloadingEntryInitializer(Module &M, Constant *Addr,
   auto *Str =
       new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
                          GlobalValue::InternalLinkage, AddrName, Prefix);
+  StringRef SectionName = ".llvm.rodata.offloading";
   Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  Str->setSection(SectionName);
+  Str->setAlignment(Align(1));
+
+  // Make a metadata node for these constants so it can be queried from IR.
+  NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
+  Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
+  MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
 
   // Construct the offloading entry.
   Constant *EntryData[] = {
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index be93f9f2e1fdc8b..d2e4dc1c85dfd2d 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -945,7 +945,7 @@ Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
       "omp_global_thread_num");
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive Kind,
                                bool ForceSimpleCall, bool CheckCancelFlag) {
   if (!updateToLocation(Loc))
@@ -992,12 +992,13 @@ OpenMPIRBuilder::createBarrier(const LocationDescription &Loc, Directive Kind,
                          Args);
 
   if (UseCancelBarrier && CheckCancelFlag)
-    emitCancelationCheckImpl(Result, OMPD_parallel);
+    if (Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
+      return Err;
 
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
                               Value *IfCondition,
                               omp::Directive CanceledDirective) {
@@ -1029,18 +1030,22 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
   Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
   Value *Result = Builder.CreateCall(
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
-  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
+  auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) -> Error {
     if (CanceledDirective == OMPD_parallel) {
       IRBuilder<>::InsertPointGuard IPG(Builder);
       Builder.restoreIP(IP);
-      createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
-                    omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
-                    /* CheckCancelFlag */ false);
+      return createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                           omp::Directive::OMPD_unknown,
+                           /* ForceSimpleCall */ false,
+                           /* CheckCancelFlag */ false)
+          .takeError();
     }
+    return Error::success();
   };
 
   // The actual cancel logic is shared with others, e.g., cancel_barriers.
-  emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
+  if (Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
+    return Err;
 
   // Update the insertion point and remove the terminator we introduced.
   Builder.SetInsertPoint(UI->getParent());
@@ -1079,7 +1084,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
     const LocationDescription &Loc, Value *OutlinedFnID,
     EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
     Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP) {
@@ -1134,15 +1139,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch(
 
   auto CurFn = Builder.GetInsertBlock()->getParent();
   emitBlock(OffloadFailedBlock, CurFn);
-  Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP()));
+  InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
   emitBranch(OffloadContBlock);
   emitBlock(OffloadContBlock, CurFn, /*IsFinished=*/true);
   return Builder.saveIP();
 }
 
-void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
-                                               omp::Directive CanceledDirective,
-                                               FinalizeCallbackTy ExitCB) {
+Error OpenMPIRBuilder::emitCancelationCheckImpl(
+    Value *CancelFlag, omp::Directive CanceledDirective,
+    FinalizeCallbackTy ExitCB) {
   assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
          "Unexpected cancellation!");
 
@@ -1171,12 +1179,15 @@ void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
   // post finalization block that is known to the FiniCB callback.
   Builder.SetInsertPoint(CancellationBlock);
   if (ExitCB)
-    ExitCB(Builder.saveIP());
+    if (Error Err = ExitCB(Builder.saveIP()))
+      return Err;
   auto &FI = FinalizationStack.back();
-  FI.FiniCB(Builder.saveIP());
+  if (Error Err = FI.FiniCB(Builder.saveIP()))
+    return Err;
 
   // The continuation block is where code generation continues.
   Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
+  return Error::success();
 }
 
 // Callback used to create OpenMP runtime calls to support
@@ -1355,7 +1366,7 @@ hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
   }
 }
 
-IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
     const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
     BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
     FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
@@ -1496,7 +1507,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   // Let the caller create the body.
   assert(BodyGenCB && "Expected body generation callback!");
   InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
-  BodyGenCB(InnerAllocaIP, CodeGenIP);
+  if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
+    return Err;
 
   LLVM_DEBUG(dbgs() << "After  body codegen: " << *OuterFn << "\n");
 
@@ -1565,10 +1577,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   FunctionCallee TIDRTLFn =
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
 
-  auto PrivHelper = [&](Value &V) {
+  auto PrivHelper = [&](Value &V) -> Error {
     if (&V == TIDAddr || &V == ZeroAddr) {
       OI.ExcludeArgsFromAggregate.push_back(&V);
-      return;
+      return Error::success();
     }
 
     SetVector<Use *> Uses;
@@ -1608,8 +1620,11 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
     if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
       ReplacementValue = PrivTID;
     } else {
-      Builder.restoreIP(
-          PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
+      InsertPointOrErrorTy AfterIP =
+          PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.restoreIP(*AfterIP);
       InnerAllocaIP = {
           InnerAllocaIP.getBlock(),
           InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
@@ -1617,11 +1632,13 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
       assert(ReplacementValue &&
              "Expected copy/create callback to set replacement value!");
       if (ReplacementValue == &V)
-        return;
+        return Error::success();
     }
 
     for (Use *UPtr : Uses)
       UPtr->set(ReplacementValue);
+
+    return Error::success();
   };
 
   // Reset the inner alloca insertion as it will be used for loading the values
@@ -1640,7 +1657,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
 
   for (Value *Input : Inputs) {
     LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
-    PrivHelper(*Input);
+    if (Error Err = PrivHelper(*Input))
+      return Err;
   }
   LLVM_DEBUG({
     for (Value *Output : Outputs)
@@ -1666,7 +1684,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
 
   InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
-  FiniCB(PreFiniIP);
+  if (Error Err = FiniCB(PreFiniIP))
+    return Err;
 
   // Register the outlined info.
   addOutlineInfo(std::move(OI));
@@ -1797,7 +1816,7 @@ static Value *emitTaskDependencies(
   return DepArray;
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createTask(const LocationDescription &Loc,
                             InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
                             bool Tied, Value *Final, Value *IfCondition,
@@ -1833,7 +1852,8 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
   InsertPointTy TaskAllocaIP =
       InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
   InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
-  BodyGenCB(TaskAllocaIP, TaskBodyIP);
+  if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
+    return Err;
 
   OutlineInfo OI;
   OI.EntryBB = TaskAllocaBB;
@@ -2048,7 +2068,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
                                  InsertPointTy AllocaIP,
                                  BodyGenCallbackTy BodyGenCB) {
@@ -2066,7 +2086,8 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
   Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
 
   BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit");
-  BodyGenCB(AllocaIP, Builder.saveIP());
+  if (Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
+    return Err;
 
   Builder.SetInsertPoint(TaskgroupExitBB);
   // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
@@ -2077,7 +2098,7 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     ArrayRef<StorableBodyGenCallbackTy> SectionCBs, PrivatizeCallbackTy PrivCB,
     FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
@@ -2124,7 +2145,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
   // ...
   // section_loop.after:
   // <FiniCB>;
-  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
+  auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) -> Error {
     Builder.restoreIP(CodeGenIP);
     BasicBlock *Continue =
         splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after");
@@ -2138,12 +2159,14 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
       SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
       Builder.SetInsertPoint(CaseBB);
       BranchInst *CaseEndBr = Builder.CreateBr(Continue);
-      SectionCB(InsertPointTy(),
-                {CaseEndBr->getParent(), CaseEndBr->getIterator()});
+      if (Error Err = SectionCB(InsertPointTy(), {CaseEndBr->getParent(),
+                                                  CaseEndBr->getIterator()}))
+        return Err;
       CaseNumber++;
     }
     // remove the existing terminator from body BB since there can be no
     // terminators after switch/case
+    return Error::success();
   };
   // Loop body ends here
   // LowerBound, UpperBound, and STride for createCanonicalLoop
@@ -2151,10 +2174,16 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
   Value *LB = ConstantInt::get(I32Ty, 0);
   Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
   Value *ST = ConstantInt::get(I32Ty, 1);
-  llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopInfo = createCanonicalLoop(
       Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
-  InsertPointTy AfterIP =
-      applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
+  if (!LoopInfo)
+    return LoopInfo.takeError();
+
+  InsertPointOrErrorTy WsloopIP =
+      applyStaticWorkshareLoop(Loc.DL, *LoopInfo, AllocaIP, !IsNowait);
+  if (!WsloopIP)
+    return WsloopIP.takeError();
+  InsertPointTy AfterIP = *WsloopIP;
 
   // Apply the finalization callback in LoopAfterBB
   auto FiniInfo = FinalizationStack.pop_back_val();
@@ -2164,14 +2193,15 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
     Builder.restoreIP(AfterIP);
     BasicBlock *FiniBB =
         splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini");
-    CB(Builder.saveIP());
+    if (Error Err = CB(Builder.saveIP()))
+      return Err;
     AfterIP = {FiniBB, FiniBB->begin()};
   }
 
   return AfterIP;
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createSection(const LocationDescription &Loc,
                                BodyGenCallbackTy BodyGenCB,
                                FinalizeCallbackTy FiniCB) {
@@ -2502,7 +2532,7 @@ void OpenMPIRBuilder::emitReductionListCopy(
   }
 }
 
-Function *OpenMPIRBuilder::emitInterWarpCopyFunction(
+Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
     const LocationDescription &Loc, ArrayRef<ReductionInfo> ReductionInfos,
     AttributeList FuncAttrs) {
   InsertPointTy SavedIP = Builder.saveIP();
@@ -2621,10 +2651,13 @@ Function *OpenMPIRBuilder::emitInterWarpCopyFunction(
       }
 
       // kmpc_barrier.
-      createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
-                    omp::Directive::OMPD_unknown,
-                    /* ForceSimpleCall */ false,
-                    /* CheckCancelFlag */ true);
+      InsertPointOrErrorTy BarrierIP1 =
+          createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                        omp::Directive::OMPD_unknown,
+                        /* ForceSimpleCall */ false,
+                        /* CheckCancelFlag */ true);
+      if (!BarrierIP1)
+        return BarrierIP1.takeError();
       BasicBlock *ThenBB = BasicBlock::Create(Ctx, "then");
       BasicBlock *ElseBB = BasicBlock::Create(Ctx, "else");
       BasicBlock *MergeBB = BasicBlock::Create(Ctx, "ifcont");
@@ -2666,10 +2699,13 @@ Function *OpenMPIRBuilder::emitInterWarpCopyFunction(
 
       // endif
       emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
-      createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
-                    omp::Directive::OMPD_unknown,
-                    /* ForceSimpleCall */ false,
-                    /* CheckCancelFlag */ true);
+      InsertPointOrErrorTy BarrierIP2 =
+          createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                        omp::Directive::OMPD_unknown,
+                        /* ForceSimpleCall */ false,
+                        /* CheckCancelFlag */ true);
+      if (!BarrierIP2)
+        return BarrierIP2.takeError();
 
       // Warp 0 copies reduce element from transfer medium
       BasicBlock *W0ThenBB = BasicBlock::Create(Ctx, "then");
@@ -3286,7 +3322,7 @@ std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name) const {
   return (Name + Suffix).str();
 }
 
-Function *OpenMPIRBuilder::createReductionFunction(
+Expected<Function *> OpenMPIRBuilder::createReductionFunction(
     StringRef ReducerName, ArrayRef<ReductionInfo> ReductionInfos,
     ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
   auto *FuncTy = FunctionType::get(Builder.getVoidTy(),
@@ -3352,7 +3388,10 @@ Function *OpenMPIRBuilder::createReductionFunction(
       Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
       Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
       Value *Reduced;
-      RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced);
+      InsertPointOrErrorTy AfterIP =
+          RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced);
+      if (!AfterIP)
+        return AfterIP.takeError();
       if (!Builder.GetInsertBlock())
         return ReductionFunc;
       Builder.CreateStore(Reduced, LHSPtr);
@@ -3405,7 +3444,7 @@ checkReductionInfos(ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
   }
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     InsertPointTy CodeGenIP, ArrayRef<ReductionInfo> ReductionInfos,
     bool IsNoWait, bool IsTeamsReduction, bool HasDistribute,
@@ -3435,11 +3474,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU(
   AttrBldr.removeAttribute(Attribute::OptimizeNone);
   FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
 
-  Function *ReductionFunc = nullptr;
   CodeGenIP = Builder.saveIP();
-  ReductionFunc =
+  Expected<Function *> ReductionResult =
       createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
                               ReductionInfos, ReductionGenCBKind, FuncAttrs);
+  if (!ReductionResult)
+    return ReductionResult.takeError();
+  Function *ReductionFunc = *ReductionResult;
   Builder.restoreIP(CodeGenIP);
 
   // Set the grid value in the config needed for lowering later on
@@ -3480,7 +3521,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductionsGPU(
   CodeGenIP = Builder.saveIP();
   Function *SarFunc =
       emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
-  Function *WcFunc = emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
+  Expected<Function *> CopyResult =
+      emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
+  if (!CopyResult)
+    return CopyResult.takeError();
+  Function *WcFunc = *CopyResult;
   Builder.restoreIP(CodeGenIP);
 
   Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
@@ -3595,7 +3640,7 @@ static Function *getFreshReductionFunc(Module &M) {
                           ".omp.reduction.func", &M);
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
                                   InsertPointTy AllocaIP,
                                   ArrayRef<ReductionInfo> ReductionInfos,
@@ -3688,7 +3733,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
     Type *ValueType = RI.ElementType;
     // We have one less load for by-ref case because that load is now inside of
     // the reduction region
-    Value *RedValue = nullptr;
+    Value *RedValue = RI.Variable;
     if (!IsByRef[En.index()]) {
       RedValue = Builder.CreateLoad(ValueType, RI.Variable,
                                     "red.value." + Twine(En.index()));
@@ -3697,13 +3742,12 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
         Builder.CreateLoad(ValueType, RI.PrivateVariable,
                            "red.private.value." + Twine(En.index()));
     Value *Reduced;
-    if (IsByRef[En.index()]) {
-      Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RI.Variable,
-                                        PrivateRedValue, Reduced));
-    } else {
-      Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), RedValue,
-                                        PrivateRedValue, Reduced));
-    }
+    InsertPointOrErrorTy AfterIP =
+        RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
+    if (!AfterIP)
+      return AfterIP.takeError();
+    Builder.restoreIP(*AfterIP);
+
     if (!Builder.GetInsertBlock())
       return InsertPointTy();
     // for by-ref case, the load is inside of the reduction region
@@ -3722,8 +3766,11 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
   Builder.SetInsertPoint(AtomicRedBlock);
   if (CanGenerateAtomic && llvm::none_of(IsByRef, [](bool P) { return P; })) {
     for (const ReductionInfo &RI : ReductionInfos) {
-      Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.ElementType,
-                                              RI.Variable, RI.PrivateVariable));
+      InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
+          Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.restoreIP(*AfterIP);
       if (!Builder.GetInsertBlock())
         return InsertPointTy();
     }
@@ -3755,7 +3802,11 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
         Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
     Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
     Value *Reduced;
-    Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
+    InsertPointOrErrorTy AfterIP =
+        RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced);
+    if (!AfterIP)
+      return AfterIP.takeError();
+    Builder.restoreIP(*AfterIP);
     if (!Builder.GetInsertBlock())
       return InsertPointTy();
     // store is inside of the reduction region when using by-ref
@@ -3768,11 +3819,10 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
                               BodyGenCallbackTy BodyGenCB,
                               FinalizeCallbackTy FiniCB) {
-
   if (!updateToLocation(Loc))
     return Loc.IP;
 
@@ -3793,7 +3843,7 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
                               /*Conditional*/ true, /*hasFinalize*/ true);
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
                               BodyGenCallbackTy BodyGenCB,
                               FinalizeCallbackTy FiniCB, Value *Filter) {
@@ -3884,7 +3934,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
   return CL;
 }
 
-CanonicalLoopInfo *
+Expected<CanonicalLoopInfo *>
 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
                                      LoopBodyGenCallbackTy BodyGenCB,
                                      Value *TripCount, const Twine &Name) {
@@ -3906,7 +3956,8 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
 
   // Emit the body content. We do it after connecting the loop to the CFG to
   // avoid that the callback encounters degenerate BBs.
-  BodyGenCB(CL->getBodyIP(), CL->getIndVar());
+  if (Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
+    return Err;
 
 #ifndef NDEBUG
   CL->assertOK();
@@ -3914,7 +3965,7 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
   return CL;
 }
 
-CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
+Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
     const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
     Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
     InsertPointTy ComputeIP, const Twine &Name) {
@@ -3979,7 +4030,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
     Builder.restoreIP(CodeGenIP);
     Value *Span = Builder.CreateMul(IV, Step);
     Value *IndVar = Builder.CreateAdd(Span, Start);
-    BodyGenCB(Builder.saveIP(), IndVar);
+    return BodyGenCB(Builder.saveIP(), IndVar);
   };
   LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
   return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
@@ -4001,7 +4052,7 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
   llvm_unreachable("unknown OpenMP loop iterator bitwidth");
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
                                           InsertPointTy AllocaIP,
                                           bool NeedsBarrier) {
@@ -4078,10 +4129,14 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
   Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
 
   // Add the barrier if requested.
-  if (NeedsBarrier)
-    createBarrier(LocationDescription(Builder.saveIP(), DL),
-                  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
-                  /* CheckCancelFlag */ false);
+  if (NeedsBarrier) {
+    InsertPointOrErrorTy BarrierIP =
+        createBarrier(LocationDescription(Builder.saveIP(), DL),
+                      omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
+                      /* CheckCancelFlag */ false);
+    if (!BarrierIP)
+      return BarrierIP.takeError();
+  }
 
   InsertPointTy AfterIP = CLI->getAfterIP();
   CLI->invalidate();
@@ -4089,9 +4144,12 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
   return AfterIP;
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
-    DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
-    bool NeedsBarrier, Value *ChunkSize) {
+OpenMPIRBuilder::InsertPointOrErrorTy
+OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(DebugLoc DL,
+                                                 CanonicalLoopInfo *CLI,
+                                                 InsertPointTy AllocaIP,
+                                                 bool NeedsBarrier,
+                                                 Value *ChunkSize) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
   assert(ChunkSize && "Chunk size is required");
 
@@ -4167,12 +4225,23 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
   // Create outer "dispatch" loop for enumerating the chunks.
   BasicBlock *DispatchEnter = splitBB(Builder, true);
   Value *DispatchCounter;
-  CanonicalLoopInfo *DispatchCLI = createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = createCanonicalLoop(
       {Builder.saveIP(), DL},
-      [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; },
+      [&](InsertPointTy BodyIP, Value *Counter) {
+        DispatchCounter = Counter;
+        return Error::success();
+      },
       FirstChunkStart, CastedTripCount, NextChunkStride,
       /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{},
       "dispatch");
+  if (!LoopResult) {
+    // It is safe to assume this didn't return an error because the callback
+    // passed into createCanonicalLoop is the only possible error source, and it
+    // always returns success. Need to still cast the result into bool to avoid
+    // runtime errors.
+    llvm_unreachable("unexpected error creating canonical loop");
+  }
+  CanonicalLoopInfo *DispatchCLI = *LoopResult;
 
   // Remember the BasicBlocks of the dispatch loop we need, then invalidate to
   // not have to preserve the canonical invariant.
@@ -4219,9 +4288,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
   Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
 
   // Add the barrier if requested.
-  if (NeedsBarrier)
-    createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for,
-                  /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false);
+  if (NeedsBarrier) {
+    InsertPointOrErrorTy AfterIP =
+        createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for,
+                      /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false);
+    if (!AfterIP)
+      return AfterIP.takeError();
+  }
 
 #ifndef NDEBUG
   // Even though we currently do not support applying additional methods to it,
@@ -4229,7 +4302,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
   CLI->assertOK();
 #endif
 
-  return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
+  return InsertPointTy(DispatchAfter, DispatchAfter->getFirstInsertionPt());
 }
 
 // Returns an LLVM function to call for executing an OpenMP static worksharing
@@ -4462,7 +4535,7 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
   return CLI->getAfterIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
     DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
     bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize,
     bool HasSimdModifier, bool HasMonotonicModifier,
@@ -4563,9 +4636,11 @@ getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
   llvm_unreachable("unknown OpenMP loop iterator bitwidth");
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
-    DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
-    OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
+OpenMPIRBuilder::InsertPointOrErrorTy
+OpenMPIRBuilder::applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+                                           InsertPointTy AllocaIP,
+                                           OMPScheduleType SchedType,
+                                           bool NeedsBarrier, Value *Chunk) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
   assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
          "Require dedicated allocate IP");
@@ -4681,9 +4756,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
   // Add the barrier if requested.
   if (NeedsBarrier) {
     Builder.SetInsertPoint(&Exit->back());
-    createBarrier(LocationDescription(Builder.saveIP(), DL),
-                  omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
-                  /* CheckCancelFlag */ false);
+    InsertPointOrErrorTy BarrierIP =
+        createBarrier(LocationDescription(Builder.saveIP(), DL),
+                      omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
+                      /* CheckCancelFlag */ false);
+    if (!BarrierIP)
+      return BarrierIP.takeError();
   }
 
   CLI->invalidate();
@@ -5542,7 +5620,7 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
     FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef<llvm::Value *> CPVars,
     ArrayRef<llvm::Function *> CPFuncs) {
@@ -5571,14 +5649,17 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
   Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
   Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
 
-  auto FiniCBWrapper = [&](InsertPointTy IP) {
-    FiniCB(IP);
+  auto FiniCBWrapper = [&](InsertPointTy IP) -> Error {
+    if (Error Err = FiniCB(IP))
+      return Err;
 
     // The thread that executes the single region must set `DidIt` to 1.
     // This is used by __kmpc_copyprivate, to know if the caller is the
     // single thread or not.
     if (DidIt)
       Builder.CreateStore(Builder.getInt32(1), DidIt);
+
+    return Error::success();
   };
 
   // generates the following:
@@ -5589,9 +5670,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
   // __kmpc_copyprivate
   // __kmpc_barrier
 
-  EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
-                       /*Conditional*/ true,
-                       /*hasFinalize*/ true);
+  InsertPointOrErrorTy AfterIP =
+      EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
+                           /*Conditional*/ true,
+                           /*hasFinalize*/ true);
+  if (!AfterIP)
+    return AfterIP.takeError();
 
   if (DidIt) {
     for (size_t I = 0, E = CPVars.size(); I < E; ++I)
@@ -5600,14 +5684,18 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSingle(
                         /*BufSize=*/ConstantInt::get(Int64, 0), CPVars[I],
                         CPFuncs[I], DidIt);
     // NOTE __kmpc_copyprivate already inserts a barrier
-  } else if (!IsNowait)
-    createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
-                  omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
-                  /* CheckCancelFlag */ false);
+  } else if (!IsNowait) {
+    InsertPointOrErrorTy AfterIP =
+        createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+                      omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
+                      /* CheckCancelFlag */ false);
+    if (!AfterIP)
+      return AfterIP.takeError();
+  }
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
     FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
 
@@ -5688,7 +5776,7 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
     const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
     FinalizeCallbackTy FiniCB, bool IsThreads) {
   if (!updateToLocation(Loc))
@@ -5717,7 +5805,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
                               /*Conditional*/ false, /*hasFinalize*/ true);
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
     Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
     BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
     bool HasFinalize, bool IsCancellable) {
@@ -5739,15 +5827,19 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
   emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
 
   // generate body
-  BodyGenCB(/* AllocaIP */ InsertPointTy(),
-            /* CodeGenIP */ Builder.saveIP());
+  if (Error Err = BodyGenCB(/* AllocaIP */ InsertPointTy(),
+                            /* CodeGenIP */ Builder.saveIP()))
+    return Err;
 
   // emit exit call and do any needed finalization.
   auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
   assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
          FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
          "Unexpected control flow graph state!!");
-  emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
+  InsertPointOrErrorTy AfterIP =
+      emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
+  if (!AfterIP)
+    return AfterIP.takeError();
   assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
          "Unexpected Control Flow State!");
   MergeBlockIntoPredecessor(FiniBB);
@@ -5796,7 +5888,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
   return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
     omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
     bool HasFinalize) {
 
@@ -5810,7 +5902,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
     FinalizationInfo Fi = FinalizationStack.pop_back_val();
     assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
 
-    Fi.FiniCB(FinIP);
+    if (Error Err = Fi.FiniCB(FinIP))
+      return Err;
 
     BasicBlock *FiniBB = FinIP.getBlock();
     Instruction *FiniBBTI = FiniBB->getTerminator();
@@ -6319,7 +6412,7 @@ Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
       Constant::getNullValue(Builder.getInt8Ty()), EntryFnName);
 }
 
-void OpenMPIRBuilder::emitTargetRegionFunction(
+Error OpenMPIRBuilder::emitTargetRegionFunction(
     TargetRegionEntryInfo &EntryInfo,
     FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry,
     Function *&OutlinedFn, Constant *&OutlinedFnID) {
@@ -6327,15 +6420,20 @@ void OpenMPIRBuilder::emitTargetRegionFunction(
   SmallString<64> EntryFnName;
   OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
 
-  OutlinedFn = Config.isTargetDevice() || !Config.openMPOffloadMandatory()
-                   ? GenerateFunctionCallback(EntryFnName)
-                   : nullptr;
+  if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
+    Expected<Function *> CBResult = GenerateFunctionCallback(EntryFnName);
+    if (!CBResult)
+      return CBResult.takeError();
+    OutlinedFn = *CBResult;
+  } else {
+    OutlinedFn = nullptr;
+  }
 
   // If this target outline function is not an offload entry, we don't need to
   // register it. This may be in the case of a false if clause, or if there are
   // no OpenMP targets.
   if (!IsOffloadEntry)
-    return;
+    return Error::success();
 
   std::string EntryFnIDName =
       Config.isTargetDevice()
@@ -6344,6 +6442,7 @@ void OpenMPIRBuilder::emitTargetRegionFunction(
 
   OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
                                               EntryFnName, EntryFnIDName);
+  return Error::success();
 }
 
 Constant *OpenMPIRBuilder::registerTargetRegionFunction(
@@ -6359,12 +6458,13 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction(
   return OutlinedFnID;
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
     TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
     omp::RuntimeFunction *MapperFunc,
-    function_ref<InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)>
+    function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
+                                      BodyGenTy BodyGenType)>
         BodyGenCB,
     function_ref<void(unsigned int, Value *)> DeviceAddrCB,
     function_ref<Value *(unsigned int)> CustomMapperCB, Value *SrcLocInfo) {
@@ -6374,8 +6474,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
   Builder.restoreIP(CodeGenIP);
   // Disable TargetData CodeGen on Device pass.
   if (Config.IsTargetDevice.value_or(false)) {
-    if (BodyGenCB)
-      Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv));
+    if (BodyGenCB) {
+      InsertPointOrErrorTy AfterIP =
+          BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.restoreIP(*AfterIP);
+    }
     return Builder.saveIP();
   }
 
@@ -6384,7 +6489,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
   // Generate the code for the opening of the data environment. Capture all the
   // arguments of the runtime call by reference because they are used in the
   // closing of the region.
-  auto BeginThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+  auto BeginThenGen = [&](InsertPointTy AllocaIP,
+                          InsertPointTy CodeGenIP) -> Error {
     MapInfo = &GenMapInfoCB(Builder.saveIP());
     emitOffloadingArrays(AllocaIP, Builder.saveIP(), *MapInfo, Info,
                          /*IsNonContiguous=*/true, DeviceAddrCB,
@@ -6413,7 +6519,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
     if (IsStandAlone) {
       assert(MapperFunc && "MapperFunc missing for standalone target data");
 
-      auto TaskBodyCB = [&](Value *, Value *, IRBuilderBase::InsertPoint) {
+      auto TaskBodyCB = [&](Value *, Value *,
+                            IRBuilderBase::InsertPoint) -> Error {
         if (Info.HasNoWait) {
           OffloadingArgs.append({llvm::Constant::getNullValue(Int32),
                                  llvm::Constant::getNullValue(VoidPtr),
@@ -6431,16 +6538,20 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
           emitBlock(OffloadContBlock, CurFn, /*IsFinished=*/true);
           Builder.restoreIP(Builder.saveIP());
         }
+        return Error::success();
       };
 
       bool RequiresOuterTargetTask = Info.HasNoWait;
-
-      if (!RequiresOuterTargetTask)
-        TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr,
-                   /*TargetTaskAllocaIP=*/{});
-      else
-        emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
-                       /*Dependencies=*/{}, Info.HasNoWait);
+      if (!RequiresOuterTargetTask) {
+        Error Err = TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr,
+                               /*TargetTaskAllocaIP=*/{});
+        assert(!Err && "TaskBodyCB expected to succeed");
+      } else {
+        InsertPointOrErrorTy AfterIP =
+            emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
+                           /*Dependencies=*/{}, Info.HasNoWait);
+        assert(AfterIP && "TaskBodyCB expected to succeed");
+      }
     } else {
       Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
           omp::OMPRTL___tgt_target_data_begin_mapper);
@@ -6458,15 +6569,26 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
       // If device pointer privatization is required, emit the body of the
       // region here. It will have to be duplicated: with and without
       // privatization.
-      Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::Priv));
+      InsertPointOrErrorTy AfterIP =
+          BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.restoreIP(*AfterIP);
     }
+    return Error::success();
   };
 
   // If we need device pointer privatization, we need to emit the body of the
   // region with no privatization in the 'else' branch of the conditional.
   // Otherwise, we don't have to do anything.
-  auto BeginElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
-    Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv));
+  auto BeginElseGen = [&](InsertPointTy AllocaIP,
+                          InsertPointTy CodeGenIP) -> Error {
+    InsertPointOrErrorTy AfterIP =
+        BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
+    if (!AfterIP)
+      return AfterIP.takeError();
+    Builder.restoreIP(*AfterIP);
+    return Error::success();
   };
 
   // Generate code for the closing of the data region.
@@ -6494,35 +6616,45 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
         getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
 
     Builder.CreateCall(EndMapperFunc, OffloadingArgs);
+    return Error::success();
   };
 
   // We don't have to do anything to close the region if the if clause evaluates
   // to false.
-  auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
 
-  if (BodyGenCB) {
-    if (IfCond) {
-      emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
-    } else {
-      BeginThenGen(AllocaIP, Builder.saveIP());
+  Error Err = [&]() -> Error {
+    if (BodyGenCB) {
+      Error Err = [&]() {
+        if (IfCond)
+          return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
+        return BeginThenGen(AllocaIP, Builder.saveIP());
+      }();
+
+      if (Err)
+        return Err;
+
+      // If we don't require privatization of device pointers, we emit the body
+      // in between the runtime calls. This avoids duplicating the body code.
+      InsertPointOrErrorTy AfterIP =
+          BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
+      if (!AfterIP)
+        return AfterIP.takeError();
+      Builder.restoreIP(*AfterIP);
+
+      if (IfCond)
+        return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
+      return EndThenGen(AllocaIP, Builder.saveIP());
     }
+    if (IfCond)
+      return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
+    return BeginThenGen(AllocaIP, Builder.saveIP());
+  }();
 
-    // If we don't require privatization of device pointers, we emit the body in
-    // between the runtime calls. This avoids duplicating the body code.
-    Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv));
-
-    if (IfCond) {
-      emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
-    } else {
-      EndThenGen(AllocaIP, Builder.saveIP());
-    }
-  } else {
-    if (IfCond) {
-      emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
-    } else {
-      BeginThenGen(AllocaIP, Builder.saveIP());
-    }
-  }
+  if (Err)
+    return Err;
 
   return Builder.saveIP();
 }
@@ -6591,7 +6723,7 @@ FunctionCallee OpenMPIRBuilder::createDispatchDeinitFunction() {
   return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
 }
 
-static Function *createOutlinedFunction(
+static Expected<Function *> createOutlinedFunction(
     OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
     SmallVectorImpl<Value *> &Inputs,
     OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
@@ -6671,7 +6803,11 @@ static Function *createOutlinedFunction(
     OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
 
   // Insert target deinit call in the device compilation pass.
-  Builder.restoreIP(CBFunc(Builder.saveIP(), Builder.saveIP()));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      CBFunc(Builder.saveIP(), Builder.saveIP());
+  if (!AfterIP)
+    return AfterIP.takeError();
+  Builder.restoreIP(*AfterIP);
   if (OMPBuilder.Config.isTargetDevice())
     OMPBuilder.createTargetDeinit(Builder);
 
@@ -6726,8 +6862,11 @@ static Function *createOutlinedFunction(
     Argument &Arg = std::get<1>(InArg);
     Value *InputCopy = nullptr;
 
-    Builder.restoreIP(
-        ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP()));
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+        ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP());
+    if (!AfterIP)
+      return AfterIP.takeError();
+    Builder.restoreIP(*AfterIP);
 
     // In certain cases a Global may be set up for replacement, however, this
     // Global may be used in multiple arguments to the kernel, just segmented
@@ -6847,7 +6986,8 @@ static Function *emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder,
   Builder.CreateRetVoid();
   return ProxyFn;
 }
-static void emitTargetOutlinedFunction(
+
+static Error emitTargetOutlinedFunction(
     OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry,
     TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
     Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
@@ -6861,11 +7001,12 @@ static void emitTargetOutlinedFunction(
                                       CBFunc, ArgAccessorFuncCB);
       };
 
-  OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
-                                      IsOffloadEntry, OutlinedFn, OutlinedFnID);
+  return OMPBuilder.emitTargetRegionFunction(
+      EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
+      OutlinedFnID);
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
     TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
     OpenMPIRBuilder::InsertPointTy AllocaIP,
     const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
@@ -6983,7 +7124,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
 
   Builder.restoreIP(TargetTaskBodyIP);
 
-  TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP);
+  if (Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
+    return Err;
 
   OI.ExitBB = Builder.saveIP().getBlock();
   OI.PostOutlineCB = [this, ToBeDeleted, Dependencies, HasNoWait,
@@ -7161,8 +7303,8 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
   // Generate a function call to the host fallback implementation of the target
   // region. This is called by the host when no offload entry was generated for
   // the target region and when the offloading call fails at runtime.
-  auto &&EmitTargetCallFallbackCB =
-      [&](OpenMPIRBuilder::InsertPointTy IP) -> OpenMPIRBuilder::InsertPointTy {
+  auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
+      -> OpenMPIRBuilder::InsertPointOrErrorTy {
     Builder.restoreIP(IP);
     Builder.CreateCall(OutlinedFn, Args);
     return Builder.saveIP();
@@ -7173,9 +7315,10 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
 
   OpenMPIRBuilder::TargetKernelArgs KArgs;
 
-  auto TaskBodyCB = [&](Value *DeviceID, Value *RTLoc,
-                        IRBuilderBase::InsertPoint TargetTaskAllocaIP) {
-    if (OutlinedFnID) {
+  auto TaskBodyCB =
+      [&](Value *DeviceID, Value *RTLoc,
+          IRBuilderBase::InsertPoint TargetTaskAllocaIP) -> Error {
+    llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() {
       // emitKernelLaunch makes the necessary runtime call to offload the
       // kernel. We then outline all that code into a separate function
       // ('kernel_launch_function' in the pseudo code above). This function is
@@ -7183,31 +7326,41 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
       // '@.omp_target_task_proxy_func' in the pseudo code above)
       // "@.omp_target_task_proxy_func' is generated by
       // emitTargetTaskProxyFunction.
-      Builder.restoreIP(OMPBuilder.emitKernelLaunch(
-          Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID,
-          RTLoc, TargetTaskAllocaIP));
-    } else {
-      // When OutlinedFnID is set to nullptr, then it's not an offloading
-      // call. In this case, we execute the host implementation directly.
-      OMPBuilder.Builder.restoreIP(
-          EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP()));
-    }
+      if (OutlinedFnID)
+        return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
+                                           EmitTargetCallFallbackCB, KArgs,
+                                           DeviceID, RTLoc, TargetTaskAllocaIP);
+      // When OutlinedFnID is set to nullptr, then it's not an offloading call.
+      // In this case, we execute the host implementation directly.
+      return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
+    }();
+
+    if (!AfterIP)
+      return AfterIP.takeError();
+
+    OMPBuilder.Builder.restoreIP(*AfterIP);
+    return Error::success();
   };
 
   // If we don't have an ID for the target region, it means an offload entry
   // wasn't created. In this case we just run the host fallback directly.
   if (!OutlinedFnID) {
-    if (RequiresOuterTargetTask) {
-      // Arguments that are intended to be directly forwarded to an
-      // emitKernelLaunch call are pased as nullptr, since OutlinedFnID=nullptr
-      // results in that call not being done.
-      Builder.restoreIP(OMPBuilder.emitTargetTask(TaskBodyCB,
-                                                  /*DeviceID=*/nullptr,
-                                                  /*RTLoc=*/nullptr, AllocaIP,
-                                                  Dependencies, HasNoWait));
-    } else {
-      Builder.restoreIP(EmitTargetCallFallbackCB(Builder.saveIP()));
-    }
+    OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() {
+      if (RequiresOuterTargetTask) {
+        // Arguments that are intended to be directly forwarded to an
+        // emitKernelLaunch call are pased as nullptr, since
+        // OutlinedFnID=nullptr results in that call not being done.
+        return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr,
+                                         /*RTLoc=*/nullptr, AllocaIP,
+                                         Dependencies, HasNoWait);
+      }
+      return EmitTargetCallFallbackCB(Builder.saveIP());
+    }();
+
+    // Assume no error was returned because EmitTargetCallFallbackCB doesn't
+    // produce any. The 'if' check enables accessing the returned value.
+    if (AfterIP)
+      Builder.restoreIP(*AfterIP);
     return;
   }
 
@@ -7247,17 +7400,24 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
 
   // The presence of certain clauses on the target directive require the
   // explicit generation of the target task.
-  if (RequiresOuterTargetTask) {
-    Builder.restoreIP(OMPBuilder.emitTargetTask(
-        TaskBodyCB, DeviceID, RTLoc, AllocaIP, Dependencies, HasNoWait));
-  } else {
-    Builder.restoreIP(OMPBuilder.emitKernelLaunch(
-        Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID, RTLoc,
-        AllocaIP));
-  }
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = [&]() {
+    if (RequiresOuterTargetTask)
+      return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
+                                       Dependencies, HasNoWait);
+
+    return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
+                                       EmitTargetCallFallbackCB, KArgs,
+                                       DeviceID, RTLoc, AllocaIP);
+  }();
+
+  // Assume no error was returned because TaskBodyCB and
+  // EmitTargetCallFallbackCB don't produce any. The 'if' check enables
+  // accessing the returned value.
+  if (AfterIP)
+    Builder.restoreIP(*AfterIP);
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
     const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
     InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo,
     ArrayRef<int32_t> NumTeams, ArrayRef<int32_t> NumThreads,
@@ -7276,9 +7436,10 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
   // The target region is outlined into its own function. The LLVM IR for
   // the target region itself is generated using the callbacks CBFunc
   // and ArgAccessorFuncCB
-  emitTargetOutlinedFunction(*this, Builder, IsOffloadEntry, EntryInfo,
-                             OutlinedFn, OutlinedFnID, Args, CBFunc,
-                             ArgAccessorFuncCB);
+  if (Error Err = emitTargetOutlinedFunction(
+          *this, Builder, IsOffloadEntry, EntryInfo, OutlinedFn, OutlinedFnID,
+          Args, CBFunc, ArgAccessorFuncCB))
+    return Err;
 
   // If we are not on the target device, then we need to generate code
   // to make a remote call (offload) to the previously outlined function
@@ -7767,18 +7928,17 @@ void OpenMPIRBuilder::emitBlock(BasicBlock *BB, Function *CurFn,
   Builder.SetInsertPoint(BB);
 }
 
-void OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
-                                   BodyGenCallbackTy ElseGen,
-                                   InsertPointTy AllocaIP) {
+Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
+                                    BodyGenCallbackTy ElseGen,
+                                    InsertPointTy AllocaIP) {
   // If the condition constant folds and can be elided, try to avoid emitting
   // the condition and the dead arm of the if/else.
   if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
     auto CondConstant = CI->getSExtValue();
     if (CondConstant)
-      ThenGen(AllocaIP, Builder.saveIP());
-    else
-      ElseGen(AllocaIP, Builder.saveIP());
-    return;
+      return ThenGen(AllocaIP, Builder.saveIP());
+
+    return ElseGen(AllocaIP, Builder.saveIP());
   }
 
   Function *CurFn = Builder.GetInsertBlock()->getParent();
@@ -7791,16 +7951,19 @@ void OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
   Builder.CreateCondBr(Cond, ThenBlock, ElseBlock);
   // Emit the 'then' code.
   emitBlock(ThenBlock, CurFn);
-  ThenGen(AllocaIP, Builder.saveIP());
+  if (Error Err = ThenGen(AllocaIP, Builder.saveIP()))
+    return Err;
   emitBranch(ContBlock);
   // Emit the 'else' code if present.
   // There is no need to emit line number for unconditional branch.
   emitBlock(ElseBlock, CurFn);
-  ElseGen(AllocaIP, Builder.saveIP());
+  if (Error Err = ElseGen(AllocaIP, Builder.saveIP()))
+    return Err;
   // There is no need to emit line number for unconditional branch.
   emitBranch(ContBlock);
   // Emit the continuation block for code after the if.
   emitBlock(ContBlock, CurFn, /*IsFinished=*/true);
+  return Error::success();
 }
 
 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
@@ -7948,7 +8111,7 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
     const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
     Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
     AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
@@ -7969,8 +8132,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicUpdate(
            "OpenMP atomic does not support LT or GT operations");
   });
 
-  emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
-                   X.IsVolatile, IsXBinopExpr);
+  Expected<std::pair<Value *, Value *>> AtomicResult =
+      emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
+                       X.IsVolatile, IsXBinopExpr);
+  if (!AtomicResult)
+    return AtomicResult.takeError();
   checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
   return Builder.saveIP();
 }
@@ -8010,7 +8176,7 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
   llvm_unreachable("Unsupported atomic update operation");
 }
 
-std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
+Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
     InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
     AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
     AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
@@ -8072,7 +8238,10 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
     llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
     PHI->addIncoming(AtomicLoadRes.first, CurBB);
     Value *OldExprVal = PHI;
-    Value *Upd = UpdateOp(OldExprVal, Builder);
+    Expected<Value *> CBResult = UpdateOp(OldExprVal, Builder);
+    if (!CBResult)
+      return CBResult.takeError();
+    Value *Upd = *CBResult;
     Builder.CreateStore(Upd, NewAtomicAddr);
     AtomicOrdering Failure =
         llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
@@ -8129,7 +8298,10 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
       }
     }
 
-    Value *Upd = UpdateOp(OldExprVal, Builder);
+    Expected<Value *> CBResult = UpdateOp(OldExprVal, Builder);
+    if (!CBResult)
+      return CBResult.takeError();
+    Value *Upd = *CBResult;
     Builder.CreateStore(Upd, NewAtomicAddr);
     LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
     AtomicOrdering Failure =
@@ -8158,7 +8330,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
   return Res;
 }
 
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
     const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
     AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
     AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
@@ -8181,11 +8353,13 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCapture(
   // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
   // 'x' is simply atomically rewritten with 'expr'.
   AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
-  std::pair<Value *, Value *> Result =
+  Expected<std::pair<Value *, Value *>> AtomicResult =
       emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
                        X.IsVolatile, IsXBinopExpr);
-
-  Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
+  if (!AtomicResult)
+    return AtomicResult.takeError();
+  Value *CapturedVal =
+      (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
   Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
 
   checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
@@ -8380,7 +8554,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
   return Builder.saveIP();
 }
 
-OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::InsertPointOrErrorTy
 OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
                              BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower,
                              Value *NumTeamsUpper, Value *ThreadLimit,
@@ -8463,7 +8637,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
   // Generate the body of teams.
   InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
   InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
-  BodyGenCB(AllocaIP, CodeGenIP);
+  if (Error Err = BodyGenCB(AllocaIP, CodeGenIP))
+    return Err;
 
   OutlineInfo OI;
   OI.EntryBB = AllocaBB;
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index bb03c9290e4cf41..73882fbc7a251ab 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/AMDGPUAddrSpace.h"
 #include "llvm/Support/CommandLine.h"
@@ -1301,6 +1302,10 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn,
             (Name.consume_front("local") || Name.consume_front("shared") ||
              Name.consume_front("global") || Name.consume_front("constant")) &&
             Name.starts_with(".to.gen");
+      else if (Name.consume_front("ldg.global."))
+        // nvvm.ldg.global.{i,p,f}
+        Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
+                  Name.starts_with("p."));
       else
         Expand = false;
 
@@ -2363,6 +2368,15 @@ static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI,
                Name.consume_front("constant")) &&
               Name.starts_with(".to.gen"))) {
     Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
+  } else if (Name.consume_front("ldg.global")) {
+    Value *Ptr = CI->getArgOperand(0);
+    Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
+    // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
+    Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
+    Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
+    MDNode *MD = MDNode::get(Builder.getContext(), {});
+    LD->setMetadata(LLVMContext::MD_invariant_load, MD);
+    return LD;
   } else {
     Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name);
     if (IID != Intrinsic::not_intrinsic &&
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 57d9a03c9c22b83..07dfbc41e79b005 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -581,26 +581,27 @@ Constant *llvm::ConstantFoldUnaryInstruction(unsigned Opcode, Constant *C) {
     case Instruction::FNeg:
       return ConstantFP::get(C->getContext(), neg(CV));
     }
-  } else if (auto *VTy = dyn_cast<FixedVectorType>(C->getType())) {
-
-    Type *Ty = IntegerType::get(VTy->getContext(), 32);
+  } else if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
     // Fast path for splatted constants.
     if (Constant *Splat = C->getSplatValue())
       if (Constant *Elt = ConstantFoldUnaryInstruction(Opcode, Splat))
         return ConstantVector::getSplat(VTy->getElementCount(), Elt);
 
-    // Fold each element and create a vector constant from those constants.
-    SmallVector<Constant *, 16> Result;
-    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-      Constant *ExtractIdx = ConstantInt::get(Ty, i);
-      Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx);
-      Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt);
-      if (!Res)
-        return nullptr;
-      Result.push_back(Res);
-    }
+    if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+      // Fold each element and create a vector constant from those constants.
+      Type *Ty = IntegerType::get(FVTy->getContext(), 32);
+      SmallVector<Constant *, 16> Result;
+      for (unsigned i = 0, e = FVTy->getNumElements(); i != e; ++i) {
+        Constant *ExtractIdx = ConstantInt::get(Ty, i);
+        Constant *Elt = ConstantExpr::getExtractElement(C, ExtractIdx);
+        Constant *Res = ConstantFoldUnaryInstruction(Opcode, Elt);
+        if (!Res)
+          return nullptr;
+        Result.push_back(Res);
+      }
 
-    return ConstantVector::get(Result);
+      return ConstantVector::get(Result);
+    }
   }
 
   // We don't know how to fold this.
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index d295d1f5785eb9d..a4af0ead07cf616 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -151,7 +151,8 @@ bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
 bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
   return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
          ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
-         IndexBitWidth == Other.IndexBitWidth;
+         IndexBitWidth == Other.IndexBitWidth &&
+         IsNonIntegral == Other.IsNonIntegral;
 }
 
 namespace {
@@ -206,7 +207,8 @@ constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
 
 // Default pointer type specifications.
 constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
-    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64
+    // p0:64:64:64:64
+    {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64, false},
 };
 
 DataLayout::DataLayout()
@@ -239,13 +241,11 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
   PointerSpecs = Other.PointerSpecs;
   StructABIAlignment = Other.StructABIAlignment;
   StructPrefAlignment = Other.StructPrefAlignment;
-  NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces;
   return *this;
 }
 
 bool DataLayout::operator==(const DataLayout &Other) const {
   // NOTE: StringRepresentation might differ, it is not canonicalized.
-  // FIXME: NonIntegralAddressSpaces isn't compared.
   return BigEndian == Other.BigEndian &&
          AllocaAddrSpace == Other.AllocaAddrSpace &&
          ProgramAddrSpace == Other.ProgramAddrSpace &&
@@ -454,11 +454,13 @@ Error DataLayout::parsePointerSpec(StringRef Spec) {
     return createStringError(
         "index size cannot be larger than the pointer size");
 
-  setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth);
+  setPointerSpec(AddrSpace, BitWidth, ABIAlign, PrefAlign, IndexBitWidth,
+                 false);
   return Error::success();
 }
 
-Error DataLayout::parseSpecification(StringRef Spec) {
+Error DataLayout::parseSpecification(
+    StringRef Spec, SmallVectorImpl<unsigned> &NonIntegralAddressSpaces) {
   // The "ni" specifier is the only two-character specifier. Handle it first.
   if (Spec.starts_with("ni")) {
     // ni:<address space>[:<address space>]...
@@ -614,12 +616,23 @@ Error DataLayout::parseLayoutString(StringRef LayoutString) {
 
   // Split the data layout string into specifications separated by '-' and
   // parse each specification individually, updating internal data structures.
+  SmallVector<unsigned, 8> NonIntegralAddressSpaces;
   for (StringRef Spec : split(LayoutString, '-')) {
     if (Spec.empty())
       return createStringError("empty specification is not allowed");
-    if (Error Err = parseSpecification(Spec))
+    if (Error Err = parseSpecification(Spec, NonIntegralAddressSpaces))
       return Err;
   }
+  // Mark all address spaces that were qualified as non-integral now. This has
+  // to be done later since the non-integral property is not part of the data
+  // layout pointer specification.
+  for (unsigned AS : NonIntegralAddressSpaces) {
+    // If there is no special spec for a given AS, getPointerSpec(AS) returns
+    // the spec for AS0, and we then update that to mark it non-integral.
+    const PointerSpec &PS = getPointerSpec(AS);
+    setPointerSpec(AS, PS.BitWidth, PS.ABIAlign, PS.PrefAlign, PS.IndexBitWidth,
+                   true);
+  }
 
   return Error::success();
 }
@@ -666,16 +679,17 @@ DataLayout::getPointerSpec(uint32_t AddrSpace) const {
 
 void DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
                                 Align ABIAlign, Align PrefAlign,
-                                uint32_t IndexBitWidth) {
+                                uint32_t IndexBitWidth, bool IsNonIntegral) {
   auto I = lower_bound(PointerSpecs, AddrSpace, LessPointerAddrSpace());
   if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
     PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
-                                       IndexBitWidth});
+                                       IndexBitWidth, IsNonIntegral});
   } else {
     I->BitWidth = BitWidth;
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
     I->IndexBitWidth = IndexBitWidth;
+    I->IsNonIntegral = IsNonIntegral;
   }
 }
 
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 50b29ae4f41676a..e20a0f053481ed3 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -1799,6 +1799,47 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc) {
     unwrap<Instruction>(Inst)->setDebugLoc(DebugLoc());
 }
 
+LLVMMetadataRef LLVMDIBuilderCreateLabel(
+    LLVMDIBuilderRef Builder,
+    LLVMMetadataRef Context, const char *Name, size_t NameLen,
+    LLVMMetadataRef File, unsigned LineNo, LLVMBool AlwaysPreserve) {
+  return wrap(unwrap(Builder)->createLabel(
+    unwrapDI<DIScope>(Context), StringRef(Name, NameLen),
+    unwrapDI<DIFile>(File), LineNo, AlwaysPreserve));
+}
+
+LLVMDbgRecordRef LLVMDIBuilderInsertLabelBefore(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo,
+    LLVMMetadataRef Location, LLVMValueRef InsertBefore) {
+  DbgInstPtr DbgInst = unwrap(Builder)->insertLabel(
+    unwrapDI<DILabel>(LabelInfo), unwrapDI<DILocation>(Location),
+    unwrap<Instruction>(InsertBefore));
+  // This assert will fail if the module is in the old debug info format.
+  // This function should only be called if the module is in the new
+  // debug info format.
+  // See https://llvm.org/docs/RemoveDIsDebugInfo.html#c-api-changes,
+  // LLVMIsNewDbgInfoFormat, and LLVMSetIsNewDbgInfoFormat for more info.
+  assert(isa<DbgRecord *>(DbgInst) &&
+         "Function unexpectedly in old debug info format");
+  return wrap(cast<DbgRecord *>(DbgInst));
+}
+
+LLVMDbgRecordRef LLVMDIBuilderInsertLabelAtEnd(
+    LLVMDIBuilderRef Builder, LLVMMetadataRef LabelInfo,
+    LLVMMetadataRef Location, LLVMBasicBlockRef InsertAtEnd) {
+  DbgInstPtr DbgInst = unwrap(Builder)->insertLabel(
+    unwrapDI<DILabel>(LabelInfo), unwrapDI<DILocation>(Location),
+    unwrap(InsertAtEnd));
+  // This assert will fail if the module is in the old debug info format.
+  // This function should only be called if the module is in the new
+  // debug info format.
+  // See https://llvm.org/docs/RemoveDIsDebugInfo.html#c-api-changes,
+  // LLVMIsNewDbgInfoFormat, and LLVMSetIsNewDbgInfoFormat for more info.
+  assert(isa<DbgRecord *>(DbgInst) &&
+         "Function unexpectedly in old debug info format");
+  return wrap(cast<DbgRecord *>(DbgInst));
+}
+
 LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata) {
   switch(unwrap(Metadata)->getMetadataID()) {
 #define HANDLE_METADATA_LEAF(CLASS) \
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index fb4f33a021a96bc..a51f9124af04dad 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -24,61 +24,185 @@ namespace {
 // by the MergeFunctions pass.
 
 class StructuralHashImpl {
-  uint64_t Hash = 4;
+  stable_hash Hash = 4;
 
-  void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+  bool DetailedHash;
 
-  // This will produce different values on 32-bit and 64-bit systens as
-  // hash_combine returns a size_t. However, this is only used for
-  // detailed hashing which, in-tree, only needs to distinguish between
-  // differences in functions.
-  template <typename T> void hashArbitaryType(const T &V) {
-    hash(hash_combine(V));
-  }
+  // This random value acts as a block header, as otherwise the partition of
+  // opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
+  static constexpr stable_hash BlockHeaderHash = 45798;
+  static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
+  static constexpr stable_hash GlobalHeaderHash = 23456;
+
+  /// IgnoreOp is a function that returns true if the operand should be ignored.
+  IgnoreOperandFunc IgnoreOp = nullptr;
+  /// A mapping from instruction indices to instruction pointers.
+  /// The index represents the position of an instruction based on the order in
+  /// which it is first encountered.
+  std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
+  /// A mapping from pairs of instruction indices and operand indices
+  /// to the hashes of the operands.
+  std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
+
+  /// Assign a unique ID to each Value in the order they are first seen.
+  DenseMap<const Value *, int> ValueToId;
 
-  void hashType(Type *ValueType) {
-    hash(ValueType->getTypeID());
+  stable_hash hashType(Type *ValueType) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(ValueType->getTypeID());
     if (ValueType->isIntegerTy())
-      hash(ValueType->getIntegerBitWidth());
+      Hashes.emplace_back(ValueType->getIntegerBitWidth());
+    return stable_hash_combine(Hashes);
   }
 
 public:
-  StructuralHashImpl() = default;
-
-  void updateOperand(Value *Operand) {
-    hashType(Operand->getType());
-
-    // The cases enumerated below are not exhaustive and are only aimed to
-    // get decent coverage over the function.
-    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
-      hashArbitaryType(ConstInt->getValue());
-    } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
-      hashArbitaryType(ConstFP->getValue());
-    } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
-      hash(Arg->getArgNo());
-    } else if (Function *Func = dyn_cast<Function>(Operand)) {
-      // Hashing the name will be deterministic as LLVM's hashing infrastructure
-      // has explicit support for hashing strings and will not simply hash
-      // the pointer.
-      hashArbitaryType(Func->getName());
+  StructuralHashImpl() = delete;
+  explicit StructuralHashImpl(bool DetailedHash,
+                              IgnoreOperandFunc IgnoreOp = nullptr)
+      : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
+    if (IgnoreOp) {
+      IndexInstruction = std::make_unique<IndexInstrMap>();
+      IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
     }
   }
 
-  void updateInstruction(const Instruction &Inst, bool DetailedHash) {
-    hash(Inst.getOpcode());
+  stable_hash hashAPInt(const APInt &I) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(I.getBitWidth());
+    auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
+    Hashes.append(RawVals.begin(), RawVals.end());
+    return stable_hash_combine(Hashes);
+  }
+
+  stable_hash hashAPFloat(const APFloat &F) {
+    return hashAPInt(F.bitcastToAPInt());
+  }
+
+  stable_hash hashGlobalValue(const GlobalValue *GV) {
+    if (!GV->hasName())
+      return 0;
+    return stable_hash_name(GV->getName());
+  }
+
+  // Compute a hash for a Constant. This function is logically similar to
+  // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
+  // we're interested in computing a hash rather than comparing two Constants.
+  // Some of the logic is simplified, e.g, we don't expand GEPOperator.
+  stable_hash hashConstant(Constant *C) {
+    SmallVector<stable_hash> Hashes;
+
+    Type *Ty = C->getType();
+    Hashes.emplace_back(hashType(Ty));
+
+    if (C->isNullValue()) {
+      Hashes.emplace_back(static_cast<stable_hash>('N'));
+      return stable_hash_combine(Hashes);
+    }
+
+    if (auto *G = dyn_cast<GlobalValue>(C)) {
+      Hashes.emplace_back(hashGlobalValue(G));
+      return stable_hash_combine(Hashes);
+    }
+
+    if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
+      Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
+      return stable_hash_combine(Hashes);
+    }
+
+    switch (C->getValueID()) {
+    case Value::ConstantIntVal: {
+      const APInt &Int = cast<ConstantInt>(C)->getValue();
+      Hashes.emplace_back(hashAPInt(Int));
+      return stable_hash_combine(Hashes);
+    }
+    case Value::ConstantFPVal: {
+      const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
+      Hashes.emplace_back(hashAPFloat(APF));
+      return stable_hash_combine(Hashes);
+    }
+    case Value::ConstantArrayVal:
+    case Value::ConstantStructVal:
+    case Value::ConstantVectorVal:
+    case Value::ConstantExprVal: {
+      for (const auto &Op : C->operands()) {
+        auto H = hashConstant(cast<Constant>(Op));
+        Hashes.emplace_back(H);
+      }
+      return stable_hash_combine(Hashes);
+    }
+    case Value::BlockAddressVal: {
+      const BlockAddress *BA = cast<BlockAddress>(C);
+      auto H = hashGlobalValue(BA->getFunction());
+      Hashes.emplace_back(H);
+      return stable_hash_combine(Hashes);
+    }
+    case Value::DSOLocalEquivalentVal: {
+      const auto *Equiv = cast<DSOLocalEquivalent>(C);
+      auto H = hashGlobalValue(Equiv->getGlobalValue());
+      Hashes.emplace_back(H);
+      return stable_hash_combine(Hashes);
+    }
+    default:
+      // Skip other types of constants for simplicity.
+      return stable_hash_combine(Hashes);
+    }
+  }
+
+  stable_hash hashValue(Value *V) {
+    // Check constant and return its hash.
+    Constant *C = dyn_cast<Constant>(V);
+    if (C)
+      return hashConstant(C);
+
+    // Hash argument number.
+    SmallVector<stable_hash> Hashes;
+    if (Argument *Arg = dyn_cast<Argument>(V))
+      Hashes.emplace_back(Arg->getArgNo());
+
+    // Get an index (an insertion order) for the non-constant value.
+    auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
+    Hashes.emplace_back(It->second);
+
+    return stable_hash_combine(Hashes);
+  }
+
+  stable_hash hashOperand(Value *Operand) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(hashType(Operand->getType()));
+    Hashes.emplace_back(hashValue(Operand));
+    return stable_hash_combine(Hashes);
+  }
+
+  stable_hash hashInstruction(const Instruction &Inst) {
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(Inst.getOpcode());
 
     if (!DetailedHash)
-      return;
+      return stable_hash_combine(Hashes);
 
-    hashType(Inst.getType());
+    Hashes.emplace_back(hashType(Inst.getType()));
 
     // Handle additional properties of specific instructions that cause
     // semantic differences in the IR.
     if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
-      hash(ComparisonInstruction->getPredicate());
+      Hashes.emplace_back(ComparisonInstruction->getPredicate());
+
+    unsigned InstIdx = 0;
+    if (IndexInstruction) {
+      InstIdx = IndexInstruction->size();
+      IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
+    }
+
+    for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
+      auto OpndHash = hashOperand(Op);
+      if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
+        assert(IndexOperandHashMap);
+        IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
+      } else
+        Hashes.emplace_back(OpndHash);
+    }
 
-    for (const auto &Op : Inst.operands())
-      updateOperand(Op);
+    return stable_hash_combine(Hashes);
   }
 
   // A function hash is calculated by considering only the number of arguments
@@ -97,15 +221,17 @@ class StructuralHashImpl {
   // expensive checks for pass modification status). When modifying this
   // function, most changes should be gated behind an option and enabled
   // selectively.
-  void update(const Function &F, bool DetailedHash) {
+  void update(const Function &F) {
     // Declarations don't affect analyses.
     if (F.isDeclaration())
       return;
 
-    hash(0x62642d6b6b2d6b72); // Function header
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(Hash);
+    Hashes.emplace_back(FunctionHeaderHash);
 
-    hash(F.isVarArg());
-    hash(F.arg_size());
+    Hashes.emplace_back(F.isVarArg());
+    Hashes.emplace_back(F.arg_size());
 
     SmallVector<const BasicBlock *, 8> BBs;
     SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -118,17 +244,17 @@ class StructuralHashImpl {
     while (!BBs.empty()) {
       const BasicBlock *BB = BBs.pop_back_val();
 
-      // This random value acts as a block header, as otherwise the partition of
-      // opcodes into BBs wouldn't affect the hash, only the order of the
-      // opcodes
-      hash(45798);
+      Hashes.emplace_back(BlockHeaderHash);
       for (auto &Inst : *BB)
-        updateInstruction(Inst, DetailedHash);
+        Hashes.emplace_back(hashInstruction(Inst));
 
       for (const BasicBlock *Succ : successors(BB))
         if (VisitedBBs.insert(Succ).second)
           BBs.push_back(Succ);
     }
+
+    // Update the combined hash in place.
+    Hash = stable_hash_combine(Hashes);
   }
 
   void update(const GlobalVariable &GV) {
@@ -137,30 +263,52 @@ class StructuralHashImpl {
     // we ignore anything with the `.llvm` prefix
     if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
       return;
-    hash(23456); // Global header
-    hash(GV.getValueType()->getTypeID());
+    SmallVector<stable_hash> Hashes;
+    Hashes.emplace_back(Hash);
+    Hashes.emplace_back(GlobalHeaderHash);
+    Hashes.emplace_back(GV.getValueType()->getTypeID());
+
+    // Update the combined hash in place.
+    Hash = stable_hash_combine(Hashes);
   }
 
-  void update(const Module &M, bool DetailedHash) {
+  void update(const Module &M) {
     for (const GlobalVariable &GV : M.globals())
       update(GV);
     for (const Function &F : M)
-      update(F, DetailedHash);
+      update(F);
   }
 
   uint64_t getHash() const { return Hash; }
+
+  std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
+    return std::move(IndexInstruction);
+  }
+
+  std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
+    return std::move(IndexOperandHashMap);
+  }
 };
 
 } // namespace
 
-IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
-  StructuralHashImpl H;
-  H.update(F, DetailedHash);
+stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
+  StructuralHashImpl H(DetailedHash);
+  H.update(F);
   return H.getHash();
 }
 
-IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
-  StructuralHashImpl H;
-  H.update(M, DetailedHash);
+stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
+  StructuralHashImpl H(DetailedHash);
+  H.update(M);
   return H.getHash();
 }
+
+FunctionHashInfo
+llvm::StructuralHashWithDifferences(const Function &F,
+                                    IgnoreOperandFunc IgnoreOp) {
+  StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
+  H.update(F);
+  return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
+                          H.getIndexPairOpndHashMap());
+}
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index f618263f79c3133..e311cde415174a9 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -58,16 +58,19 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
   return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
 }
 
-bool Type::isScalableTy() const {
+bool Type::isScalableTy(SmallPtrSetImpl<const Type *> &Visited) const {
   if (const auto *ATy = dyn_cast<ArrayType>(this))
-    return ATy->getElementType()->isScalableTy();
-  if (const auto *STy = dyn_cast<StructType>(this)) {
-    SmallPtrSet<Type *, 4> Visited;
-    return STy->containsScalableVectorType(&Visited);
-  }
+    return ATy->getElementType()->isScalableTy(Visited);
+  if (const auto *STy = dyn_cast<StructType>(this))
+    return STy->isScalableTy(Visited);
   return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy();
 }
 
+bool Type::isScalableTy() const {
+  SmallPtrSet<const Type *, 4> Visited;
+  return isScalableTy(Visited);
+}
+
 const fltSemantics &Type::getFltSemantics() const {
   switch (getTypeID()) {
   case HalfTyID: return APFloat::IEEEhalf();
@@ -394,30 +397,22 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
   return ST;
 }
 
-bool StructType::containsScalableVectorType(
-    SmallPtrSetImpl<Type *> *Visited) const {
+bool StructType::isScalableTy(SmallPtrSetImpl<const Type *> &Visited) const {
   if ((getSubclassData() & SCDB_ContainsScalableVector) != 0)
     return true;
 
   if ((getSubclassData() & SCDB_NotContainsScalableVector) != 0)
     return false;
 
-  if (Visited && !Visited->insert(const_cast<StructType *>(this)).second)
+  if (!Visited.insert(this).second)
     return false;
 
   for (Type *Ty : elements()) {
-    if (isa<ScalableVectorType>(Ty)) {
+    if (Ty->isScalableTy(Visited)) {
       const_cast<StructType *>(this)->setSubclassData(
           getSubclassData() | SCDB_ContainsScalableVector);
       return true;
     }
-    if (auto *STy = dyn_cast<StructType>(Ty)) {
-      if (STy->containsScalableVectorType(Visited)) {
-        const_cast<StructType *>(this)->setSubclassData(
-            getSubclassData() | SCDB_ContainsScalableVector);
-        return true;
-      }
-    }
   }
 
   // For structures that are opaque, return false but do not set the
@@ -839,6 +834,14 @@ Expected<TargetExtType *> TargetExtType::checkParams(TargetExtType *TTy) {
         "target extension type riscv.vector.tuple should have one "
         "type parameter and one integer parameter");
 
+  // Opaque types in the AMDGPU name space.
+  if (TTy->Name == "amdgcn.named.barrier" &&
+      (TTy->getNumTypeParameters() != 0 || TTy->getNumIntParameters() != 1)) {
+    return createStringError("target extension type amdgcn.named.barrier "
+                             "should have no type parameters "
+                             "and one integer parameter");
+  }
+
   return TTy;
 }
 
@@ -884,6 +887,12 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
   if (Name.starts_with("dx."))
     return TargetTypeInfo(PointerType::get(C, 0));
 
+  // Opaque types in the AMDGPU name space.
+  if (Name == "amdgcn.named.barrier") {
+    return TargetTypeInfo(FixedVectorType::get(Type::getInt32Ty(C), 4),
+                          TargetExtType::CanBeGlobal);
+  }
+
   return TargetTypeInfo(Type::getVoidTy(C));
 }
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 4eae2a0eb110b56..d7270f2c32b3aee 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -2235,9 +2235,9 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
   }
 
   Check(!(Attrs.hasFnAttr(Attribute::SanitizeRealtime) &&
-          Attrs.hasFnAttr(Attribute::SanitizeRealtimeUnsafe)),
+          Attrs.hasFnAttr(Attribute::SanitizeRealtimeBlocking)),
         "Attributes "
-        "'sanitize_realtime and sanitize_realtime_unsafe' are incompatible!",
+        "'sanitize_realtime and sanitize_realtime_blocking' are incompatible!",
         V);
 
   if (Attrs.hasFnAttr(Attribute::OptimizeForDebugging)) {
@@ -4107,8 +4107,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
 
   if (auto *STy = dyn_cast<StructType>(GEP.getSourceElementType())) {
-    SmallPtrSet<Type *, 4> Visited;
-    Check(!STy->containsScalableVectorType(&Visited),
+    Check(!STy->isScalableTy(),
           "getelementptr cannot target structure that contains scalable vector"
           "type",
           &GEP);
@@ -4122,8 +4121,9 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs);
   Check(ElTy, "Invalid indices for GEP pointer type!", &GEP);
 
-  Check(GEP.getType()->isPtrOrPtrVectorTy() &&
-            GEP.getResultElementType() == ElTy,
+  PointerType *PtrTy = dyn_cast<PointerType>(GEP.getType()->getScalarType());
+
+  Check(PtrTy && GEP.getResultElementType() == ElTy,
         "GEP is not of right type for indices!", &GEP, ElTy);
 
   if (auto *GEPVTy = dyn_cast<VectorType>(GEP.getType())) {
@@ -4145,10 +4145,8 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
   }
 
-  if (auto *PTy = dyn_cast<PointerType>(GEP.getType())) {
-    Check(GEP.getAddressSpace() == PTy->getAddressSpace(),
-          "GEP address space doesn't match type", &GEP);
-  }
+  Check(GEP.getAddressSpace() == PtrTy->getAddressSpace(),
+        "GEP address space doesn't match type", &GEP);
 
   visitInstruction(GEP);
 }
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 31b519a3e5c56a0..b9ad0b4eac9c7ba 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -373,6 +373,7 @@ class MCAsmStreamer final : public MCStreamer {
                        SMLoc Loc) override;
   void emitCFIWindowSave(SMLoc Loc) override;
   void emitCFINegateRAState(SMLoc Loc) override;
+  void emitCFINegateRAStateWithPC(SMLoc Loc) override;
   void emitCFIReturnColumn(int64_t Register) override;
   void emitCFILabelDirective(SMLoc Loc, StringRef Name) override;
 
@@ -2145,6 +2146,12 @@ void MCAsmStreamer::emitCFINegateRAState(SMLoc Loc) {
   EmitEOL();
 }
 
+void MCAsmStreamer::emitCFINegateRAStateWithPC(SMLoc Loc) {
+  MCStreamer::emitCFINegateRAStateWithPC(Loc);
+  OS << "\t.cfi_negate_ra_state_with_pc";
+  EmitEOL();
+}
+
 void MCAsmStreamer::emitCFIReturnColumn(int64_t Register) {
   MCStreamer::emitCFIReturnColumn(Register);
   OS << "\t.cfi_return_column ";
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 8ff097f29aebd15..e058358fb8ad4bd 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -1381,6 +1381,10 @@ void FrameEmitterImpl::emitCFIInstruction(const MCCFIInstruction &Instr) {
     Streamer.emitInt8(dwarf::DW_CFA_AARCH64_negate_ra_state);
     return;
 
+  case MCCFIInstruction::OpNegateRAStateWithPC:
+    Streamer.emitInt8(dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc);
+    return;
+
   case MCCFIInstruction::OpUndefined: {
     unsigned Reg = Instr.getRegister();
     Streamer.emitInt8(dwarf::DW_CFA_undefined);
diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp
index e4faeba04a8fd7f..069716a3ecf9b70 100644
--- a/llvm/lib/MC/MCInstPrinter.cpp
+++ b/llvm/lib/MC/MCInstPrinter.cpp
@@ -43,7 +43,7 @@ StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
   return MII.getName(Opcode);
 }
 
-void MCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void MCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   llvm_unreachable("Target should implement this");
 }
 
@@ -224,29 +224,32 @@ format_object<uint64_t> MCInstPrinter::formatHex(uint64_t Value) const {
   llvm_unreachable("unsupported print style");
 }
 
-MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS,
-                                                Markup S) const {
-  return WithMarkup(OS, S, getUseMarkup(), getUseColor());
+MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS, Markup S) {
+  return WithMarkup(*this, OS, S, getUseMarkup(), getUseColor());
 }
 
-MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M,
-                                      bool EnableMarkup, bool EnableColor)
-    : OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) {
+MCInstPrinter::WithMarkup::WithMarkup(MCInstPrinter &IP, raw_ostream &OS,
+                                      Markup M, bool EnableMarkup,
+                                      bool EnableColor)
+    : IP(IP), OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) {
   if (EnableColor) {
+    raw_ostream::Colors Color = raw_ostream::Colors::RESET;
     switch (M) {
     case Markup::Immediate:
-      OS.changeColor(raw_ostream::RED);
+      Color = raw_ostream::RED;
       break;
     case Markup::Register:
-      OS.changeColor(raw_ostream::CYAN);
+      Color = raw_ostream::CYAN;
       break;
     case Markup::Target:
-      OS.changeColor(raw_ostream::YELLOW);
+      Color = raw_ostream::YELLOW;
       break;
     case Markup::Memory:
-      OS.changeColor(raw_ostream::GREEN);
+      Color = raw_ostream::GREEN;
       break;
     }
+    IP.ColorStack.push_back(Color);
+    OS.changeColor(Color);
   }
 
   if (EnableMarkup) {
@@ -270,6 +273,8 @@ MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M,
 MCInstPrinter::WithMarkup::~WithMarkup() {
   if (EnableMarkup)
     OS << '>';
-  if (EnableColor)
-    OS.resetColor();
+  if (!EnableColor)
+    return;
+  IP.ColorStack.pop_back();
+  OS << IP.ColorStack.back();
 }
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 4774e5112af535f..ecccb228c8c3875 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -264,7 +264,7 @@ class AsmParser : public MCAsmParser {
                         SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
                         SmallVectorImpl<std::string> &Constraints,
                         SmallVectorImpl<std::string> &Clobbers,
-                        const MCInstrInfo *MII, const MCInstPrinter *IP,
+                        const MCInstrInfo *MII, MCInstPrinter *IP,
                         MCAsmParserSemaCallback &SI) override;
 
   bool parseExpression(const MCExpr *&Res);
@@ -6006,7 +6006,7 @@ bool AsmParser::parseMSInlineAsm(
     SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
     SmallVectorImpl<std::string> &Constraints,
     SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
-    const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
+    MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
   SmallVector<void *, 4> InputDecls;
   SmallVector<void *, 4> OutputDecls;
   SmallVector<bool, 4> InputDeclsAddressOf;
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 9cdc2aafe3d2241..b58210b3c268e99 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -163,7 +163,7 @@ class ELFAsmParser : public MCAsmParserExtension {
 
 } // end anonymous namespace
 
-/// ParseDirectiveSymbolAttribute
+/// parseDirectiveSymbolAttribute
 ///  ::= { ".local", ".weak", ... } [ identifier ( , identifier )* ]
 bool ELFAsmParser::parseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
   MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
@@ -746,7 +746,7 @@ static MCSymbolAttr MCAttrForString(StringRef Type) {
           .Default(MCSA_Invalid);
 }
 
-/// ParseDirectiveELFType
+/// parseDirectiveELFType
 ///  ::= .type identifier , STT_<TYPE_IN_UPPER_CASE>
 ///  ::= .type identifier , #attribute
 ///  ::= .type identifier , @attribute
@@ -803,7 +803,7 @@ bool ELFAsmParser::parseDirectiveType(StringRef, SMLoc) {
   return false;
 }
 
-/// ParseDirectiveIdent
+/// parseDirectiveIdent
 ///  ::= .ident string
 bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::String))
@@ -821,7 +821,7 @@ bool ELFAsmParser::parseDirectiveIdent(StringRef, SMLoc) {
   return false;
 }
 
-/// ParseDirectiveSymver
+/// parseDirectiveSymver
 ///  ::= .symver foo, bar2@zed
 bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) {
   StringRef OriginalName, Name, Action;
@@ -858,7 +858,7 @@ bool ELFAsmParser::parseDirectiveSymver(StringRef, SMLoc) {
   return false;
 }
 
-/// ParseDirectiveVersion
+/// parseDirectiveVersion
 ///  ::= .version string
 bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) {
   if (getLexer().isNot(AsmToken::String))
@@ -882,7 +882,7 @@ bool ELFAsmParser::parseDirectiveVersion(StringRef, SMLoc) {
   return false;
 }
 
-/// ParseDirectiveWeakref
+/// parseDirectiveWeakref
 ///  ::= .weakref foo, bar
 bool ELFAsmParser::parseDirectiveWeakref(StringRef, SMLoc) {
   // FIXME: Share code with the other alias building directives.
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index d88fd09a1aa07c6..a7f37d81f640929 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -539,7 +539,7 @@ class MasmParser : public MCAsmParser {
                         SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
                         SmallVectorImpl<std::string> &Constraints,
                         SmallVectorImpl<std::string> &Clobbers,
-                        const MCInstrInfo *MII, const MCInstPrinter *IP,
+                        const MCInstrInfo *MII, MCInstPrinter *IP,
                         MCAsmParserSemaCallback &SI) override;
 
   bool parseExpression(const MCExpr *&Res);
@@ -7340,7 +7340,7 @@ bool MasmParser::parseMSInlineAsm(
     SmallVectorImpl<std::pair<void *, bool>> &OpDecls,
     SmallVectorImpl<std::string> &Constraints,
     SmallVectorImpl<std::string> &Clobbers, const MCInstrInfo *MII,
-    const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
+    MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {
   SmallVector<void *, 4> InputDecls;
   SmallVector<void *, 4> OutputDecls;
   SmallVector<bool, 4> InputDeclsAddressOf;
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 13b162768578c53..5474db1315f1416 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -688,6 +688,16 @@ void MCStreamer::emitCFINegateRAState(SMLoc Loc) {
   CurFrame->Instructions.push_back(Instruction);
 }
 
+void MCStreamer::emitCFINegateRAStateWithPC(SMLoc Loc) {
+  MCSymbol *Label = emitCFILabel();
+  MCCFIInstruction Instruction =
+      MCCFIInstruction::createNegateRAStateWithPC(Label, Loc);
+  MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+  if (!CurFrame)
+    return;
+  CurFrame->Instructions.push_back(Instruction);
+}
+
 void MCStreamer::emitCFIReturnColumn(int64_t Register) {
   MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
   if (!CurFrame)
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 94aa1ebc8f9e116..f5b83f29352ca7f 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -32,6 +32,10 @@ MCXCOFFStreamer::MCXCOFFStreamer(MCContext &Context,
     : MCObjectStreamer(Context, std::move(MAB), std::move(OW),
                        std::move(Emitter)) {}
 
+XCOFFObjectWriter &MCXCOFFStreamer::getWriter() {
+  return static_cast<XCOFFObjectWriter &>(getAssembler().getWriter());
+}
+
 bool MCXCOFFStreamer::emitSymbolAttribute(MCSymbol *Sym,
                                           MCSymbolAttr Attribute) {
   auto *Symbol = cast<MCSymbolXCOFF>(Sym);
@@ -109,14 +113,12 @@ void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
                                                unsigned Lang, unsigned Reason,
                                                unsigned FunctionSize,
                                                bool hasDebug) {
-  // TODO: Export XCOFFObjectWriter to llvm/MC/MCXCOFFObjectWriter.h and access
-  // it from MCXCOFFStreamer.
-  XCOFF::addExceptionEntry(getAssembler().getWriter(), Symbol, Trap, Lang,
-                           Reason, FunctionSize, hasDebug);
+  getWriter().addExceptionEntry(Symbol, Trap, Lang, Reason, FunctionSize,
+                                hasDebug);
 }
 
 void MCXCOFFStreamer::emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) {
-  XCOFF::addCInfoSymEntry(getAssembler().getWriter(), Name, Metadata);
+  getWriter().addCInfoSymEntry(Name, Metadata);
 }
 
 void MCXCOFFStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index c7f29c73eaac093..5d8f3dbdaadad54 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -290,8 +290,7 @@ struct CInfoSymSectionEntry : public SectionEntry {
   }
 };
 
-class XCOFFObjectWriter : public MCObjectWriter {
-
+class XCOFFWriter final : public XCOFFObjectWriter {
   uint32_t SymbolTableEntryCount = 0;
   uint64_t SymbolTableOffset = 0;
   uint16_t SectionCount = 0;
@@ -433,8 +432,8 @@ class XCOFFObjectWriter : public MCObjectWriter {
   }
 
 public:
-  XCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
-                    raw_pwrite_stream &OS);
+  XCOFFWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+              raw_pwrite_stream &OS);
 
   void writeWord(uint64_t Word) {
     is64Bit() ? W.write<uint64_t>(Word) : W.write<uint32_t>(Word);
@@ -442,12 +441,12 @@ class XCOFFObjectWriter : public MCObjectWriter {
 
   void addExceptionEntry(const MCSymbol *Symbol, const MCSymbol *Trap,
                          unsigned LanguageCode, unsigned ReasonCode,
-                         unsigned FunctionSize, bool hasDebug);
-  void addCInfoSymEntry(StringRef Name, StringRef Metadata);
+                         unsigned FunctionSize, bool hasDebug) override;
+  void addCInfoSymEntry(StringRef Name, StringRef Metadata) override;
 };
 
-XCOFFObjectWriter::XCOFFObjectWriter(
-    std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
+XCOFFWriter::XCOFFWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
+                         raw_pwrite_stream &OS)
     : W(OS, llvm::endianness::big), TargetObjectWriter(std::move(MOTW)),
       Strings(StringTableBuilder::XCOFF),
       Text(".text", XCOFF::STYP_TEXT, /* IsVirtual */ false,
@@ -463,7 +462,7 @@ XCOFFObjectWriter::XCOFFObjectWriter(
       ExceptionSection(".except", XCOFF::STYP_EXCEPT),
       CInfoSymSection(".info", XCOFF::STYP_INFO) {}
 
-void XCOFFObjectWriter::reset() {
+void XCOFFWriter::reset() {
   // Clear the mappings we created.
   SymbolIndexMap.clear();
   SectionMap.clear();
@@ -479,7 +478,7 @@ void XCOFFObjectWriter::reset() {
   ExceptionSection.reset();
   CInfoSymSection.reset();
 
-  // Reset states in XCOFFObjectWriter.
+  // Reset states in XCOFFWriter.
   SymbolTableEntryCount = 0;
   SymbolTableOffset = 0;
   SectionCount = 0;
@@ -489,7 +488,7 @@ void XCOFFObjectWriter::reset() {
   MCObjectWriter::reset();
 }
 
-CsectGroup &XCOFFObjectWriter::getCsectGroup(const MCSectionXCOFF *MCSec) {
+CsectGroup &XCOFFWriter::getCsectGroup(const MCSectionXCOFF *MCSec) {
   switch (MCSec->getMappingClass()) {
   case XCOFF::XMC_PR:
     assert(XCOFF::XTY_SD == MCSec->getCSectType() &&
@@ -556,7 +555,7 @@ static MCSectionXCOFF *getContainingCsect(const MCSymbolXCOFF *XSym) {
   return XSym->getRepresentedCsect();
 }
 
-void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm) {
+void XCOFFWriter::executePostLayoutBinding(MCAssembler &Asm) {
   for (const auto &S : Asm) {
     const auto *MCSec = cast<const MCSectionXCOFF>(&S);
     assert(!SectionMap.contains(MCSec) && "Cannot add a section twice.");
@@ -657,10 +656,9 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm) {
   assignAddressesAndIndices(Asm);
 }
 
-void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
-                                         const MCFragment *Fragment,
-                                         const MCFixup &Fixup, MCValue Target,
-                                         uint64_t &FixedValue) {
+void XCOFFWriter::recordRelocation(MCAssembler &Asm, const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue) {
   auto getIndex = [this](const MCSymbol *Sym,
                          const MCSectionXCOFF *ContainingCsect) {
     // If we could not find the symbol directly in SymbolIndexMap, this symbol
@@ -812,7 +810,7 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
   FixedValue -= getVirtualAddress(SymB, SymBSec);
 }
 
-void XCOFFObjectWriter::writeSections(const MCAssembler &Asm) {
+void XCOFFWriter::writeSections(const MCAssembler &Asm) {
   uint64_t CurrentAddressLocation = 0;
   for (const auto *Section : Sections)
     writeSectionForControlSectionEntry(Asm, *Section, CurrentAddressLocation);
@@ -824,7 +822,7 @@ void XCOFFObjectWriter::writeSections(const MCAssembler &Asm) {
                                       CurrentAddressLocation);
 }
 
-uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm) {
+uint64_t XCOFFWriter::writeObject(MCAssembler &Asm) {
   // We always emit a timestamp of 0 for reproducibility, so ensure incremental
   // linking is not enabled, in case, like with Windows COFF, such a timestamp
   // is incompatible with incremental linking of XCOFF.
@@ -844,11 +842,11 @@ uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm) {
   return W.OS.tell() - StartOffset;
 }
 
-bool XCOFFObjectWriter::nameShouldBeInStringTable(const StringRef &SymbolName) {
+bool XCOFFWriter::nameShouldBeInStringTable(const StringRef &SymbolName) {
   return SymbolName.size() > XCOFF::NameSize || is64Bit();
 }
 
-void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) {
+void XCOFFWriter::writeSymbolName(const StringRef &SymbolName) {
   // Magic, Offset or SymbolName.
   if (nameShouldBeInStringTable(SymbolName)) {
     W.write<int32_t>(0);
@@ -861,11 +859,10 @@ void XCOFFObjectWriter::writeSymbolName(const StringRef &SymbolName) {
   }
 }
 
-void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value,
-                                         int16_t SectionNumber,
-                                         uint16_t SymbolType,
-                                         uint8_t StorageClass,
-                                         uint8_t NumberOfAuxEntries) {
+void XCOFFWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value,
+                                   int16_t SectionNumber, uint16_t SymbolType,
+                                   uint8_t StorageClass,
+                                   uint8_t NumberOfAuxEntries) {
   if (is64Bit()) {
     W.write<uint64_t>(Value);
     W.write<uint32_t>(Strings.getOffset(SymbolName));
@@ -879,9 +876,9 @@ void XCOFFObjectWriter::writeSymbolEntry(StringRef SymbolName, uint64_t Value,
   W.write<uint8_t>(NumberOfAuxEntries);
 }
 
-void XCOFFObjectWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength,
-                                                 uint8_t SymbolAlignmentAndType,
-                                                 uint8_t StorageMappingClass) {
+void XCOFFWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength,
+                                           uint8_t SymbolAlignmentAndType,
+                                           uint8_t StorageMappingClass) {
   W.write<uint32_t>(is64Bit() ? Lo_32(SectionOrLength) : SectionOrLength);
   W.write<uint32_t>(0); // ParameterHashIndex
   W.write<uint16_t>(0); // TypeChkSectNum
@@ -897,12 +894,12 @@ void XCOFFObjectWriter::writeSymbolAuxCsectEntry(uint64_t SectionOrLength,
   }
 }
 
-bool XCOFFObjectWriter::auxFileSymNameShouldBeInStringTable(
+bool XCOFFWriter::auxFileSymNameShouldBeInStringTable(
     const StringRef &SymbolName) {
   return SymbolName.size() > XCOFF::AuxFileEntNameSize;
 }
 
-void XCOFFObjectWriter::writeAuxFileSymName(const StringRef &SymbolName) {
+void XCOFFWriter::writeAuxFileSymName(const StringRef &SymbolName) {
   // Magic, Offset or SymbolName.
   if (auxFileSymNameShouldBeInStringTable(SymbolName)) {
     W.write<int32_t>(0);
@@ -916,8 +913,7 @@ void XCOFFObjectWriter::writeAuxFileSymName(const StringRef &SymbolName) {
   }
 }
 
-void XCOFFObjectWriter::writeSymbolAuxFileEntry(StringRef &Name,
-                                                uint8_t ftype) {
+void XCOFFWriter::writeSymbolAuxFileEntry(StringRef &Name, uint8_t ftype) {
   writeAuxFileSymName(Name);
   W.write<uint8_t>(ftype);
   W.OS.write_zeros(2);
@@ -927,8 +923,8 @@ void XCOFFObjectWriter::writeSymbolAuxFileEntry(StringRef &Name,
     W.OS.write_zeros(1);
 }
 
-void XCOFFObjectWriter::writeSymbolAuxDwarfEntry(
-    uint64_t LengthOfSectionPortion, uint64_t NumberOfRelocEnt) {
+void XCOFFWriter::writeSymbolAuxDwarfEntry(uint64_t LengthOfSectionPortion,
+                                           uint64_t NumberOfRelocEnt) {
   writeWord(LengthOfSectionPortion);
   if (!is64Bit())
     W.OS.write_zeros(4); // Reserved
@@ -941,7 +937,7 @@ void XCOFFObjectWriter::writeSymbolAuxDwarfEntry(
   }
 }
 
-void XCOFFObjectWriter::writeSymbolEntryForCsectMemberLabel(
+void XCOFFWriter::writeSymbolEntryForCsectMemberLabel(
     const Symbol &SymbolRef, const XCOFFSection &CSectionRef,
     int16_t SectionIndex, uint64_t SymbolOffset) {
   assert(SymbolOffset <= MaxRawDataSize - CSectionRef.Address &&
@@ -986,7 +982,7 @@ void XCOFFObjectWriter::writeSymbolEntryForCsectMemberLabel(
                            CSectionRef.MCSec->getMappingClass());
 }
 
-void XCOFFObjectWriter::writeSymbolEntryForDwarfSection(
+void XCOFFWriter::writeSymbolEntryForDwarfSection(
     const XCOFFSection &DwarfSectionRef, int16_t SectionIndex) {
   assert(DwarfSectionRef.MCSec->isDwarfSect() && "Not a DWARF section!");
 
@@ -996,7 +992,7 @@ void XCOFFObjectWriter::writeSymbolEntryForDwarfSection(
   writeSymbolAuxDwarfEntry(DwarfSectionRef.Size);
 }
 
-void XCOFFObjectWriter::writeSymbolEntryForControlSection(
+void XCOFFWriter::writeSymbolEntryForControlSection(
     const XCOFFSection &CSectionRef, int16_t SectionIndex,
     XCOFF::StorageClass StorageClass) {
   writeSymbolEntry(CSectionRef.getSymbolTableName(), CSectionRef.Address,
@@ -1006,10 +1002,10 @@ void XCOFFObjectWriter::writeSymbolEntryForControlSection(
                            CSectionRef.MCSec->getMappingClass());
 }
 
-void XCOFFObjectWriter::writeSymbolAuxFunctionEntry(uint32_t EntryOffset,
-                                                    uint32_t FunctionSize,
-                                                    uint64_t LineNumberPointer,
-                                                    uint32_t EndIndex) {
+void XCOFFWriter::writeSymbolAuxFunctionEntry(uint32_t EntryOffset,
+                                              uint32_t FunctionSize,
+                                              uint64_t LineNumberPointer,
+                                              uint32_t EndIndex) {
   if (is64Bit())
     writeWord(LineNumberPointer);
   else
@@ -1026,9 +1022,9 @@ void XCOFFObjectWriter::writeSymbolAuxFunctionEntry(uint32_t EntryOffset,
   }
 }
 
-void XCOFFObjectWriter::writeSymbolAuxExceptionEntry(uint64_t EntryOffset,
-                                                     uint32_t FunctionSize,
-                                                     uint32_t EndIndex) {
+void XCOFFWriter::writeSymbolAuxExceptionEntry(uint64_t EntryOffset,
+                                               uint32_t FunctionSize,
+                                               uint32_t EndIndex) {
   assert(is64Bit() && "Exception auxilliary entries are 64-bit only.");
   W.write<uint64_t>(EntryOffset);
   W.write<uint32_t>(FunctionSize);
@@ -1037,7 +1033,7 @@ void XCOFFObjectWriter::writeSymbolAuxExceptionEntry(uint64_t EntryOffset,
   W.write<uint8_t>(XCOFF::AUX_EXCEPT);
 }
 
-void XCOFFObjectWriter::writeFileHeader() {
+void XCOFFWriter::writeFileHeader() {
   W.write<uint16_t>(is64Bit() ? XCOFF::XCOFF64 : XCOFF::XCOFF32);
   W.write<uint16_t>(SectionCount);
   W.write<int32_t>(0); // TimeStamp
@@ -1053,7 +1049,7 @@ void XCOFFObjectWriter::writeFileHeader() {
   }
 }
 
-void XCOFFObjectWriter::writeAuxFileHeader() {
+void XCOFFWriter::writeAuxFileHeader() {
   if (!auxiliaryHeaderSize())
     return;
   W.write<uint16_t>(0); // Magic
@@ -1069,7 +1065,7 @@ void XCOFFObjectWriter::writeAuxFileHeader() {
   W.write<uint32_t>(Sections[1]->Address); // DataStartAddr
 }
 
-void XCOFFObjectWriter::writeSectionHeader(const SectionEntry *Sec) {
+void XCOFFWriter::writeSectionHeader(const SectionEntry *Sec) {
   bool IsDwarf = (Sec->Flags & XCOFF::STYP_DWARF) != 0;
   bool IsOvrflo = (Sec->Flags & XCOFF::STYP_OVRFLO) != 0;
   // Nothing to write for this Section.
@@ -1109,7 +1105,7 @@ void XCOFFObjectWriter::writeSectionHeader(const SectionEntry *Sec) {
   }
 }
 
-void XCOFFObjectWriter::writeSectionHeaderTable() {
+void XCOFFWriter::writeSectionHeaderTable() {
   for (const auto *CsectSec : Sections)
     writeSectionHeader(CsectSec);
   for (const auto &DwarfSec : DwarfSections)
@@ -1122,8 +1118,8 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
     writeSectionHeader(&CInfoSymSection);
 }
 
-void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc,
-                                        const XCOFFSection &Section) {
+void XCOFFWriter::writeRelocation(XCOFFRelocation Reloc,
+                                  const XCOFFSection &Section) {
   if (Section.MCSec->isCsect())
     writeWord(Section.Address + Reloc.FixupOffsetInCsect);
   else {
@@ -1136,7 +1132,7 @@ void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc,
   W.write<uint8_t>(Reloc.Type);
 }
 
-void XCOFFObjectWriter::writeRelocations() {
+void XCOFFWriter::writeRelocations() {
   for (const auto *Section : Sections) {
     if (Section->Index == SectionEntry::UninitializedIndex)
       // Nothing to write for this Section.
@@ -1158,7 +1154,7 @@ void XCOFFObjectWriter::writeRelocations() {
       writeRelocation(Reloc, *DwarfSection.DwarfSect);
 }
 
-void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm) {
+void XCOFFWriter::writeSymbolTable(MCAssembler &Asm) {
   // Write C_FILE symbols.
   StringRef Vers = CompilerVersion;
 
@@ -1239,8 +1235,7 @@ void XCOFFObjectWriter::writeSymbolTable(MCAssembler &Asm) {
                                     DwarfSection.Index);
 }
 
-void XCOFFObjectWriter::finalizeRelocationInfo(SectionEntry *Sec,
-                                               uint64_t RelCount) {
+void XCOFFWriter::finalizeRelocationInfo(SectionEntry *Sec, uint64_t RelCount) {
   // Handles relocation field overflows in an XCOFF32 file. An XCOFF64 file
   // may not contain an overflow section header.
   if (!is64Bit() && (RelCount >= static_cast<uint32_t>(XCOFF::RelocOverflow))) {
@@ -1265,8 +1260,8 @@ void XCOFFObjectWriter::finalizeRelocationInfo(SectionEntry *Sec,
   }
 }
 
-void XCOFFObjectWriter::calcOffsetToRelocations(SectionEntry *Sec,
-                                                uint64_t &RawPointer) {
+void XCOFFWriter::calcOffsetToRelocations(SectionEntry *Sec,
+                                          uint64_t &RawPointer) {
   if (!Sec->RelocationCount)
     return;
 
@@ -1297,7 +1292,7 @@ void XCOFFObjectWriter::calcOffsetToRelocations(SectionEntry *Sec,
     report_fatal_error("Relocation data overflowed this object file.");
 }
 
-void XCOFFObjectWriter::finalizeSectionInfo() {
+void XCOFFWriter::finalizeSectionInfo() {
   for (auto *Section : Sections) {
     if (Section->Index == SectionEntry::UninitializedIndex)
       // Nothing to record for this Section.
@@ -1361,9 +1356,10 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
     SymbolTableOffset = RawPointer;
 }
 
-void XCOFFObjectWriter::addExceptionEntry(
-    const MCSymbol *Symbol, const MCSymbol *Trap, unsigned LanguageCode,
-    unsigned ReasonCode, unsigned FunctionSize, bool hasDebug) {
+void XCOFFWriter::addExceptionEntry(const MCSymbol *Symbol,
+                                    const MCSymbol *Trap, unsigned LanguageCode,
+                                    unsigned ReasonCode, unsigned FunctionSize,
+                                    bool hasDebug) {
   // If a module had debug info, debugging is enabled and XCOFF emits the
   // exception auxilliary entry.
   if (hasDebug)
@@ -1383,7 +1379,7 @@ void XCOFFObjectWriter::addExceptionEntry(
       std::pair<const StringRef, ExceptionInfo>(Symbol->getName(), NewEntry));
 }
 
-unsigned XCOFFObjectWriter::getExceptionSectionSize() {
+unsigned XCOFFWriter::getExceptionSectionSize() {
   unsigned EntryNum = 0;
 
   for (const auto &TableEntry : ExceptionSection.ExceptionTable)
@@ -1395,7 +1391,7 @@ unsigned XCOFFObjectWriter::getExceptionSectionSize() {
                                : XCOFF::ExceptionSectionEntrySize32);
 }
 
-unsigned XCOFFObjectWriter::getExceptionOffset(const MCSymbol *Symbol) {
+unsigned XCOFFWriter::getExceptionOffset(const MCSymbol *Symbol) {
   unsigned EntryNum = 0;
   for (const auto &TableEntry : ExceptionSection.ExceptionTable) {
     if (Symbol == TableEntry.second.FunctionSymbol)
@@ -1406,13 +1402,13 @@ unsigned XCOFFObjectWriter::getExceptionOffset(const MCSymbol *Symbol) {
                                : XCOFF::ExceptionSectionEntrySize32);
 }
 
-void XCOFFObjectWriter::addCInfoSymEntry(StringRef Name, StringRef Metadata) {
+void XCOFFWriter::addCInfoSymEntry(StringRef Name, StringRef Metadata) {
   assert(!CInfoSymSection.Entry && "Multiple entries are not supported");
   CInfoSymSection.addEntry(
       std::make_unique<CInfoSymInfo>(Name.str(), Metadata.str()));
 }
 
-void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm) {
+void XCOFFWriter::assignAddressesAndIndices(MCAssembler &Asm) {
   // The symbol table starts with all the C_FILE symbols. Each C_FILE symbol
   // requires 1 or 2 auxiliary entries.
   uint32_t SymbolTableIndex =
@@ -1588,7 +1584,7 @@ void XCOFFObjectWriter::assignAddressesAndIndices(MCAssembler &Asm) {
   SymbolTableEntryCount = SymbolTableIndex;
 }
 
-void XCOFFObjectWriter::writeSectionForControlSectionEntry(
+void XCOFFWriter::writeSectionForControlSectionEntry(
     const MCAssembler &Asm, const CsectSectionEntry &CsectEntry,
     uint64_t &CurrentAddressLocation) {
   // Nothing to write for this Section.
@@ -1635,7 +1631,7 @@ void XCOFFObjectWriter::writeSectionForControlSectionEntry(
   }
 }
 
-void XCOFFObjectWriter::writeSectionForDwarfSectionEntry(
+void XCOFFWriter::writeSectionForDwarfSectionEntry(
     const MCAssembler &Asm, const DwarfSectionEntry &DwarfEntry,
     uint64_t &CurrentAddressLocation) {
   // There could be a gap (without corresponding zero padding) between
@@ -1663,7 +1659,7 @@ void XCOFFObjectWriter::writeSectionForDwarfSectionEntry(
   CurrentAddressLocation += TailPaddingSize;
 }
 
-void XCOFFObjectWriter::writeSectionForExceptionSectionEntry(
+void XCOFFWriter::writeSectionForExceptionSectionEntry(
     const MCAssembler &Asm, ExceptionSectionEntry &ExceptionEntry,
     uint64_t &CurrentAddressLocation) {
   for (const auto &TableEntry : ExceptionEntry.ExceptionTable) {
@@ -1685,7 +1681,7 @@ void XCOFFObjectWriter::writeSectionForExceptionSectionEntry(
   CurrentAddressLocation += getExceptionSectionSize();
 }
 
-void XCOFFObjectWriter::writeSectionForCInfoSymSectionEntry(
+void XCOFFWriter::writeSectionForCInfoSymSectionEntry(
     const MCAssembler &Asm, CInfoSymSectionEntry &CInfoSymEntry,
     uint64_t &CurrentAddressLocation) {
   if (!CInfoSymSection.Entry)
@@ -1737,20 +1733,5 @@ uint8_t getEncodedType(const MCSectionXCOFF *Sec) {
 std::unique_ptr<MCObjectWriter>
 llvm::createXCOFFObjectWriter(std::unique_ptr<MCXCOFFObjectTargetWriter> MOTW,
                               raw_pwrite_stream &OS) {
-  return std::make_unique<XCOFFObjectWriter>(std::move(MOTW), OS);
-}
-
-// TODO: Export XCOFFObjectWriter to llvm/MC/MCXCOFFObjectWriter.h and remove
-// the forwarders.
-void XCOFF::addExceptionEntry(MCObjectWriter &Writer, const MCSymbol *Symbol,
-                              const MCSymbol *Trap, unsigned LanguageCode,
-                              unsigned ReasonCode, unsigned FunctionSize,
-                              bool hasDebug) {
-  static_cast<XCOFFObjectWriter &>(Writer).addExceptionEntry(
-      Symbol, Trap, LanguageCode, ReasonCode, FunctionSize, hasDebug);
-}
-
-void XCOFF::addCInfoSymEntry(MCObjectWriter &Writer, StringRef Name,
-                             StringRef Metadata) {
-  static_cast<XCOFFObjectWriter &>(Writer).addCInfoSymEntry(Name, Metadata);
+  return std::make_unique<XCOFFWriter>(std::move(MOTW), OS);
 }
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f5ce405ab8d9616..a879918005cad8f 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -125,6 +125,7 @@
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/StackColoring.h"
 #include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TailDuplication.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
 #include "llvm/CodeGen/TypePromotion.h"
@@ -1175,9 +1176,17 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
   return Result;
 }
 
-Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) {
-  return PassBuilder::parseSinglePassOption(Params, "detailed",
-                                            "StructuralHashPrinterPass");
+Expected<StructuralHashOptions>
+parseStructuralHashPrinterPassOptions(StringRef Params) {
+  if (Params.empty())
+    return StructuralHashOptions::None;
+  if (Params == "detailed")
+    return StructuralHashOptions::Detailed;
+  if (Params == "call-target-ignored")
+    return StructuralHashOptions::CallTargetIgnored;
+  return make_error<StringError>(
+      formatv("invalid structural hash printer parameter '{0}' ", Params).str(),
+      inconvertibleErrorCode());
 }
 
 Expected<bool> parseWinEHPrepareOptions(StringRef Params) {
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 17710eb94b6dedb..488554c84c1c437 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -296,7 +296,12 @@ static cl::opt<bool> UseLoopVersioningLICM(
     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
     cl::desc("Enable the experimental Loop Versioning LICM pass"));
 
+static cl::opt<std::string> InstrumentColdFuncOnlyPath(
+    "instrument-cold-function-only-path", cl::init(""),
+    cl::desc("File path for cold function only instrumentation"), cl::Hidden);
+
 extern cl::opt<std::string> UseCtxProfile;
+extern cl::opt<bool> PGOInstrumentColdFunctionOnly;
 
 namespace llvm {
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
@@ -1182,8 +1187,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   const bool IsCtxProfUse =
       !UseCtxProfile.empty() && Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
 
+  // Enable cold function coverage instrumentation if
+  // InstrumentColdFuncOnlyPath is provided.
+  const bool IsColdFuncOnlyInstrGen = PGOInstrumentColdFunctionOnly =
+      IsPGOPreLink && !InstrumentColdFuncOnlyPath.empty();
+
   if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
-      IsCtxProfUse)
+      IsCtxProfUse || IsColdFuncOnlyInstrGen)
     addPreInlinerPasses(MPM, Level, Phase);
 
   // Add all the requested passes for instrumentation PGO, if requested.
@@ -1205,6 +1215,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
       return MPM;
     addPostPGOLoopRotation(MPM, Level);
     MPM.addPass(PGOCtxProfLoweringPass());
+  } else if (IsColdFuncOnlyInstrGen) {
+    addPGOInstrPasses(
+        MPM, Level, /* RunProfileGen */ true, /* IsCS */ false,
+        /* AtomicCounterUpdate */ false, InstrumentColdFuncOnlyPath,
+        /* ProfileRemappingFile */ "", IntrusiveRefCntPtr<vfs::FileSystem>());
   }
 
   if (IsPGOInstrGen || IsPGOInstrUse || IsCtxProfGen)
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 549c1359b5852ce..017ae311c55eb40 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS(
     parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N")
 MODULE_PASS_WITH_PARAMS(
     "print<structural-hash>", "StructuralHashPrinterPass",
-    [](bool EnableDetailedStructuralHash) {
-      return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash);
+    [](StructuralHashOptions Options) {
+      return StructuralHashPrinterPass(dbgs(), Options);
     },
-    parseStructuralHashPrinterPassOptions, "detailed")
+    parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored")
+
 #undef MODULE_PASS_WITH_PARAMS
 
 #ifndef CGSCC_ANALYSIS
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 1a3721bf1035033..f09241681b92a5a 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
@@ -184,13 +185,25 @@ class InstrProfRecordWriterTrait {
 InstrProfWriter::InstrProfWriter(
     bool Sparse, uint64_t TemporalProfTraceReservoirSize,
     uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion,
-    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema)
+    memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema,
+    bool MemprofGenerateRandomHotness,
+    unsigned MemprofGenerateRandomHotnessSeed)
     : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
       TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
       InfoObj(new InstrProfRecordWriterTrait()),
       WritePrevVersion(WritePrevVersion),
       MemProfVersionRequested(MemProfVersionRequested),
-      MemProfFullSchema(MemProfFullSchema) {}
+      MemProfFullSchema(MemProfFullSchema),
+      MemprofGenerateRandomHotness(MemprofGenerateRandomHotness) {
+  // Set up the random number seed if requested.
+  if (MemprofGenerateRandomHotness) {
+    unsigned seed = MemprofGenerateRandomHotnessSeed
+                        ? MemprofGenerateRandomHotnessSeed
+                        : std::time(nullptr);
+    errs() << "random hotness seed = " << seed << "\n";
+    std::srand(seed);
+  }
+}
 
 InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
 
@@ -273,13 +286,34 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash,
 
 void InstrProfWriter::addMemProfRecord(
     const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) {
-  auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record});
+  auto NewRecord = Record;
+  // Provoke random hotness values if requested. We specify the lifetime access
+  // density and lifetime length that will result in a cold or not cold hotness.
+  // See the logic in getAllocType() in Analysis/MemoryProfileInfo.cpp.
+  if (MemprofGenerateRandomHotness) {
+    for (auto &Alloc : NewRecord.AllocSites) {
+      // To get a not cold context, set the lifetime access density to the
+      // maximum value and the lifetime to 0.
+      uint64_t NewTLAD = std::numeric_limits<uint64_t>::max();
+      uint64_t NewTL = 0;
+      bool IsCold = std::rand() % 2;
+      if (IsCold) {
+        // To get a cold context, set the lifetime access density to 0 and the
+        // lifetime to the maximum value.
+        NewTLAD = 0;
+        NewTL = std::numeric_limits<uint64_t>::max();
+      }
+      Alloc.Info.setTotalLifetimeAccessDensity(NewTLAD);
+      Alloc.Info.setTotalLifetime(NewTL);
+    }
+  }
+  auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord});
   // If we inserted a new record then we are done.
   if (Inserted) {
     return;
   }
   memprof::IndexedMemProfRecord &Existing = Iter->second;
-  Existing.merge(Record);
+  Existing.merge(NewRecord);
 }
 
 bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id,
diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp
index 486e935bc35fba8..b86ed5864c1ac1d 100644
--- a/llvm/lib/SandboxIR/Context.cpp
+++ b/llvm/lib/SandboxIR/Context.cpp
@@ -35,17 +35,20 @@ Value *Context::registerValue(std::unique_ptr<Value> &&VPtr) {
   assert(VPtr->getSubclassID() != Value::ClassID::User &&
          "Can't register a user!");
 
+  Value *V = VPtr.get();
+  [[maybe_unused]] auto Pair =
+      LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)});
+  assert(Pair.second && "Already exists!");
+
   // Track creation of instructions.
   // Please note that we don't allow the creation of detached instructions,
   // meaning that the instructions need to be inserted into a block upon
   // creation. This is why the tracker class combines creation and insertion.
-  if (auto *I = dyn_cast<Instruction>(VPtr.get()))
+  if (auto *I = dyn_cast<Instruction>(V)) {
     getTracker().emplaceIfTracking<CreateAndInsertInst>(I);
+    runCreateInstrCallbacks(I);
+  }
 
-  Value *V = VPtr.get();
-  [[maybe_unused]] auto Pair =
-      LLVMValueToValueMap.insert({VPtr->Val, std::move(VPtr)});
-  assert(Pair.second && "Already exists!");
   return V;
 }
 
@@ -660,4 +663,64 @@ Module *Context::createModule(llvm::Module *LLVMM) {
   return M;
 }
 
+void Context::runEraseInstrCallbacks(Instruction *I) {
+  for (const auto &CBEntry : EraseInstrCallbacks)
+    CBEntry.second(I);
+}
+
+void Context::runCreateInstrCallbacks(Instruction *I) {
+  for (auto &CBEntry : CreateInstrCallbacks)
+    CBEntry.second(I);
+}
+
+void Context::runMoveInstrCallbacks(Instruction *I, const BBIterator &WhereIt) {
+  for (auto &CBEntry : MoveInstrCallbacks)
+    CBEntry.second(I, WhereIt);
+}
+
+// An arbitrary limit, to check for accidental misuse. We expect a small number
+// of callbacks to be registered at a time, but we can increase this number if
+// we discover we needed more.
+[[maybe_unused]] static constexpr int MaxRegisteredCallbacks = 16;
+
+Context::CallbackID Context::registerEraseInstrCallback(EraseInstrCallback CB) {
+  assert(EraseInstrCallbacks.size() <= MaxRegisteredCallbacks &&
+         "EraseInstrCallbacks size limit exceeded");
+  CallbackID ID = NextCallbackID++;
+  EraseInstrCallbacks[ID] = CB;
+  return ID;
+}
+void Context::unregisterEraseInstrCallback(CallbackID ID) {
+  [[maybe_unused]] bool Erased = EraseInstrCallbacks.erase(ID);
+  assert(Erased &&
+         "Callback ID not found in EraseInstrCallbacks during deregistration");
+}
+
+Context::CallbackID
+Context::registerCreateInstrCallback(CreateInstrCallback CB) {
+  assert(CreateInstrCallbacks.size() <= MaxRegisteredCallbacks &&
+         "CreateInstrCallbacks size limit exceeded");
+  CallbackID ID = NextCallbackID++;
+  CreateInstrCallbacks[ID] = CB;
+  return ID;
+}
+void Context::unregisterCreateInstrCallback(CallbackID ID) {
+  [[maybe_unused]] bool Erased = CreateInstrCallbacks.erase(ID);
+  assert(Erased &&
+         "Callback ID not found in CreateInstrCallbacks during deregistration");
+}
+
+Context::CallbackID Context::registerMoveInstrCallback(MoveInstrCallback CB) {
+  assert(MoveInstrCallbacks.size() <= MaxRegisteredCallbacks &&
+         "MoveInstrCallbacks size limit exceeded");
+  CallbackID ID = NextCallbackID++;
+  MoveInstrCallbacks[ID] = CB;
+  return ID;
+}
+void Context::unregisterMoveInstrCallback(CallbackID ID) {
+  [[maybe_unused]] bool Erased = MoveInstrCallbacks.erase(ID);
+  assert(Erased &&
+         "Callback ID not found in MoveInstrCallbacks during deregistration");
+}
+
 } // namespace llvm::sandboxir
diff --git a/llvm/lib/SandboxIR/Instruction.cpp b/llvm/lib/SandboxIR/Instruction.cpp
index d80d10370e32d8e..096b827541eeafa 100644
--- a/llvm/lib/SandboxIR/Instruction.cpp
+++ b/llvm/lib/SandboxIR/Instruction.cpp
@@ -73,6 +73,8 @@ void Instruction::removeFromParent() {
 
 void Instruction::eraseFromParent() {
   assert(users().empty() && "Still connected to users, can't erase!");
+
+  Ctx.runEraseInstrCallbacks(this);
   std::unique_ptr<Value> Detached = Ctx.detach(this);
   auto LLVMInstrs = getLLVMInstrs();
 
@@ -100,6 +102,7 @@ void Instruction::moveBefore(BasicBlock &BB, const BBIterator &WhereIt) {
     // Destination is same as origin, nothing to do.
     return;
 
+  Ctx.runMoveInstrCallbacks(this, WhereIt);
   Ctx.getTracker().emplaceIfTracking<MoveInstr>(this);
 
   auto *LLVMBB = cast<llvm::BasicBlock>(BB.Val);
diff --git a/llvm/lib/SandboxIR/PassManager.cpp b/llvm/lib/SandboxIR/PassManager.cpp
index 3a1cfa1d367a2aa..aaa49e0f6912b61 100644
--- a/llvm/lib/SandboxIR/PassManager.cpp
+++ b/llvm/lib/SandboxIR/PassManager.cpp
@@ -10,20 +10,20 @@
 
 namespace llvm::sandboxir {
 
-bool FunctionPassManager::runOnFunction(Function &F) {
+bool FunctionPassManager::runOnFunction(Function &F, const Analyses &A) {
   bool Change = false;
   for (auto &Pass : Passes) {
-    Change |= Pass->runOnFunction(F);
+    Change |= Pass->runOnFunction(F, A);
     // TODO: run the verifier.
   }
   // TODO: Check ChangeAll against hashes before/after.
   return Change;
 }
 
-bool RegionPassManager::runOnRegion(Region &R) {
+bool RegionPassManager::runOnRegion(Region &R, const Analyses &A) {
   bool Change = false;
   for (auto &Pass : Passes) {
-    Change |= Pass->runOnRegion(R);
+    Change |= Pass->runOnRegion(R, A);
     // TODO: run the verifier.
   }
   // TODO: Check ChangeAll against hashes before/after.
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 531bdeaca12614f..2ecaea4b02bf618 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -256,6 +256,7 @@ add_llvm_component_library(LLVMSupport
   TimeProfiler.cpp
   Timer.cpp
   ToolOutputFile.cpp
+  TrieRawHashMap.cpp
   Twine.cpp
   TypeSize.cpp
   Unicode.cpp
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index 4bbe41688209621..4f5fcb4857e8056 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -35,21 +35,23 @@ static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
 }
 
 int StringRef::compare_insensitive(StringRef RHS) const {
-  if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
+  if (int Res =
+          ascii_strncasecmp(data(), RHS.data(), std::min(size(), RHS.size())))
     return Res;
-  if (Length == RHS.Length)
+  if (size() == RHS.size())
     return 0;
-  return Length < RHS.Length ? -1 : 1;
+  return size() < RHS.size() ? -1 : 1;
 }
 
 bool StringRef::starts_with_insensitive(StringRef Prefix) const {
-  return Length >= Prefix.Length &&
-      ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
+  return size() >= Prefix.size() &&
+         ascii_strncasecmp(data(), Prefix.data(), Prefix.size()) == 0;
 }
 
 bool StringRef::ends_with_insensitive(StringRef Suffix) const {
-  return Length >= Suffix.Length &&
-      ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
+  return size() >= Suffix.size() &&
+         ascii_strncasecmp(end() - Suffix.size(), Suffix.data(),
+                           Suffix.size()) == 0;
 }
 
 size_t StringRef::find_insensitive(char C, size_t From) const {
@@ -59,33 +61,33 @@ size_t StringRef::find_insensitive(char C, size_t From) const {
 
 /// compare_numeric - Compare strings, handle embedded numbers.
 int StringRef::compare_numeric(StringRef RHS) const {
-  for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
+  for (size_t I = 0, E = std::min(size(), RHS.size()); I != E; ++I) {
     // Check for sequences of digits.
-    if (isDigit(Data[I]) && isDigit(RHS.Data[I])) {
+    if (isDigit(data()[I]) && isDigit(RHS.data()[I])) {
       // The longer sequence of numbers is considered larger.
       // This doesn't really handle prefixed zeros well.
       size_t J;
       for (J = I + 1; J != E + 1; ++J) {
-        bool ld = J < Length && isDigit(Data[J]);
-        bool rd = J < RHS.Length && isDigit(RHS.Data[J]);
+        bool ld = J < size() && isDigit(data()[J]);
+        bool rd = J < RHS.size() && isDigit(RHS.data()[J]);
         if (ld != rd)
           return rd ? -1 : 1;
         if (!rd)
           break;
       }
       // The two number sequences have the same length (J-I), just memcmp them.
-      if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
+      if (int Res = compareMemory(data() + I, RHS.data() + I, J - I))
         return Res < 0 ? -1 : 1;
       // Identical number sequences, continue search after the numbers.
       I = J - 1;
       continue;
     }
-    if (Data[I] != RHS.Data[I])
-      return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+    if (data()[I] != RHS.data()[I])
+      return (unsigned char)data()[I] < (unsigned char)RHS.data()[I] ? -1 : 1;
   }
-  if (Length == RHS.Length)
+  if (size() == RHS.size())
     return 0;
-  return Length < RHS.Length ? -1 : 1;
+  return size() < RHS.size() ? -1 : 1;
 }
 
 // Compute the edit distance between the two given strings.
@@ -128,11 +130,11 @@ std::string StringRef::upper() const {
 /// \return - The index of the first occurrence of \arg Str, or npos if not
 /// found.
 size_t StringRef::find(StringRef Str, size_t From) const {
-  if (From > Length)
+  if (From > size())
     return npos;
 
-  const char *Start = Data + From;
-  size_t Size = Length - From;
+  const char *Start = data() + From;
+  size_t Size = size() - From;
 
   const char *Needle = Str.data();
   size_t N = Str.size();
@@ -142,7 +144,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
     return npos;
   if (N == 1) {
     const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
-    return Ptr == nullptr ? npos : Ptr - Data;
+    return Ptr == nullptr ? npos : Ptr - data();
   }
 
   const char *Stop = Start + (Size - N + 1);
@@ -153,7 +155,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
     // good enough.
     do {
       if (std::memcmp(Start, Needle, 2) == 0)
-        return Start - Data;
+        return Start - data();
       ++Start;
     } while (Start < Stop);
     return npos;
@@ -163,7 +165,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
   if (Size < 16 || N > 255) {
     do {
       if (std::memcmp(Start, Needle, N) == 0)
-        return Start - Data;
+        return Start - data();
       ++Start;
     } while (Start < Stop);
     return npos;
@@ -179,7 +181,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
     uint8_t Last = Start[N - 1];
     if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
       if (std::memcmp(Start, Needle, N - 1) == 0)
-        return Start - Data;
+        return Start - data();
 
     // Otherwise skip the appropriate number of bytes.
     Start += BadCharSkip[Last];
@@ -200,11 +202,11 @@ size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
 }
 
 size_t StringRef::rfind_insensitive(char C, size_t From) const {
-  From = std::min(From, Length);
+  From = std::min(From, size());
   size_t i = From;
   while (i != 0) {
     --i;
-    if (toLower(Data[i]) == toLower(C))
+    if (toLower(data()[i]) == toLower(C))
       return i;
   }
   return npos;
@@ -220,9 +222,9 @@ size_t StringRef::rfind(StringRef Str) const {
 
 size_t StringRef::rfind_insensitive(StringRef Str) const {
   size_t N = Str.size();
-  if (N > Length)
+  if (N > size())
     return npos;
-  for (size_t i = Length - N + 1, e = 0; i != e;) {
+  for (size_t i = size() - N + 1, e = 0; i != e;) {
     --i;
     if (substr(i, N).equals_insensitive(Str))
       return i;
@@ -240,8 +242,8 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars,
   for (char C : Chars)
     CharBits.set((unsigned char)C);
 
-  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
-    if (CharBits.test((unsigned char)Data[i]))
+  for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
+    if (CharBits.test((unsigned char)data()[i]))
       return i;
   return npos;
 }
@@ -262,8 +264,8 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
   for (char C : Chars)
     CharBits.set((unsigned char)C);
 
-  for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
-    if (!CharBits.test((unsigned char)Data[i]))
+  for (size_type i = std::min(From, size()), e = size(); i != e; ++i)
+    if (!CharBits.test((unsigned char)data()[i]))
       return i;
   return npos;
 }
@@ -278,8 +280,8 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars,
   for (char C : Chars)
     CharBits.set((unsigned char)C);
 
-  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
-    if (CharBits.test((unsigned char)Data[i]))
+  for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
+    if (CharBits.test((unsigned char)data()[i]))
       return i;
   return npos;
 }
@@ -287,8 +289,8 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars,
 /// find_last_not_of - Find the last character in the string that is not
 /// \arg C, or npos if not found.
 StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
-  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
-    if (Data[i] != C)
+  for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
+    if (data()[i] != C)
       return i;
   return npos;
 }
@@ -303,8 +305,8 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
   for (char C : Chars)
     CharBits.set((unsigned char)C);
 
-  for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
-    if (!CharBits.test((unsigned char)Data[i]))
+  for (size_type i = std::min(From, size()) - 1, e = -1; i != e; --i)
+    if (!CharBits.test((unsigned char)data()[i]))
       return i;
   return npos;
 }
diff --git a/llvm/lib/Support/TrieRawHashMap.cpp b/llvm/lib/Support/TrieRawHashMap.cpp
new file mode 100644
index 000000000000000..11d79a62d011ddf
--- /dev/null
+++ b/llvm/lib/Support/TrieRawHashMap.cpp
@@ -0,0 +1,515 @@
+//===- TrieRawHashMap.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/TrieRawHashMap.h"
+#include "llvm/ADT/LazyAtomicPointer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/TrieHashIndexGenerator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ThreadSafeAllocator.h"
+#include "llvm/Support/TrailingObjects.h"
+#include "llvm/Support/raw_ostream.h"
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+struct TrieNode {
+  const bool IsSubtrie = false;
+
+  TrieNode(bool IsSubtrie) : IsSubtrie(IsSubtrie) {}
+
+  static void *operator new(size_t Size) { return ::operator new(Size); }
+  void operator delete(void *Ptr) { ::operator delete(Ptr); }
+};
+
+struct TrieContent final : public TrieNode {
+  const uint8_t ContentOffset;
+  const uint8_t HashSize;
+  const uint8_t HashOffset;
+
+  void *getValuePointer() const {
+    auto *Content = reinterpret_cast<const uint8_t *>(this) + ContentOffset;
+    return const_cast<uint8_t *>(Content);
+  }
+
+  ArrayRef<uint8_t> getHash() const {
+    auto *Begin = reinterpret_cast<const uint8_t *>(this) + HashOffset;
+    return ArrayRef(Begin, Begin + HashSize);
+  }
+
+  TrieContent(size_t ContentOffset, size_t HashSize, size_t HashOffset)
+      : TrieNode(/*IsSubtrie=*/false), ContentOffset(ContentOffset),
+        HashSize(HashSize), HashOffset(HashOffset) {}
+
+  static bool classof(const TrieNode *TN) { return !TN->IsSubtrie; }
+};
+
+static_assert(sizeof(TrieContent) ==
+                  ThreadSafeTrieRawHashMapBase::TrieContentBaseSize,
+              "Check header assumption!");
+
+class TrieSubtrie final
+    : public TrieNode,
+      private TrailingObjects<TrieSubtrie, LazyAtomicPointer<TrieNode>> {
+public:
+  using Slot = LazyAtomicPointer<TrieNode>;
+
+  Slot &get(size_t I) { return getTrailingObjects<Slot>()[I]; }
+  TrieNode *load(size_t I) { return get(I).load(); }
+
+  unsigned size() const { return Size; }
+
+  TrieSubtrie *
+  sink(size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI,
+       function_ref<TrieSubtrie *(std::unique_ptr<TrieSubtrie>)> Saver);
+
+  static std::unique_ptr<TrieSubtrie> create(size_t StartBit, size_t NumBits);
+
+  explicit TrieSubtrie(size_t StartBit, size_t NumBits);
+
+  static bool classof(const TrieNode *TN) { return TN->IsSubtrie; }
+
+  static constexpr size_t sizeToAlloc(unsigned NumBits) {
+    assert(NumBits < 20 && "Tries should have fewer than ~1M slots");
+    unsigned Count = 1u << NumBits;
+    return totalSizeToAlloc<LazyAtomicPointer<TrieNode>>(Count);
+  }
+
+private:
+  // FIXME: Use a bitset to speed up access:
+  //
+  //     std::array<std::atomic<uint64_t>, NumSlots/64> IsSet;
+  //
+  // This will avoid needing to visit sparsely filled slots in
+  // \a ThreadSafeTrieRawHashMapBase::destroyImpl() when there's a non-trivial
+  // destructor.
+  //
+  // It would also greatly speed up iteration, if we add that some day, and
+  // allow get() to return one level sooner.
+  //
+  // This would be the algorithm for updating IsSet (after updating Slots):
+  //
+  //     std::atomic<uint64_t> &Bits = IsSet[I.High];
+  //     const uint64_t NewBit = 1ULL << I.Low;
+  //     uint64_t Old = 0;
+  //     while (!Bits.compare_exchange_weak(Old, Old | NewBit))
+  //       ;
+
+  // For debugging.
+  unsigned StartBit = 0;
+  unsigned NumBits = 0;
+  unsigned Size = 0;
+  friend class llvm::ThreadSafeTrieRawHashMapBase;
+  friend class TrailingObjects;
+
+public:
+  /// Linked list for ownership of tries. The pointer is owned by TrieSubtrie.
+  std::atomic<TrieSubtrie *> Next;
+};
+} // end namespace
+
+std::unique_ptr<TrieSubtrie> TrieSubtrie::create(size_t StartBit,
+                                                 size_t NumBits) {
+  void *Memory = ::operator new(sizeToAlloc(NumBits));
+  TrieSubtrie *S = ::new (Memory) TrieSubtrie(StartBit, NumBits);
+  return std::unique_ptr<TrieSubtrie>(S);
+}
+
+TrieSubtrie::TrieSubtrie(size_t StartBit, size_t NumBits)
+    : TrieNode(true), StartBit(StartBit), NumBits(NumBits), Size(1u << NumBits),
+      Next(nullptr) {
+  for (unsigned I = 0; I < Size; ++I)
+    new (&get(I)) Slot(nullptr);
+
+  static_assert(
+      std::is_trivially_destructible<LazyAtomicPointer<TrieNode>>::value,
+      "Expected no work in destructor for TrieNode");
+}
+
+// Sink the nodes down sub-trie when the object being inserted collides with
+// the index of existing object in the trie. In this case, a new sub-trie needs
+// to be allocated to hold existing object.
+TrieSubtrie *TrieSubtrie::sink(
+    size_t I, TrieContent &Content, size_t NumSubtrieBits, size_t NewI,
+    function_ref<TrieSubtrie *(std::unique_ptr<TrieSubtrie>)> Saver) {
+  // Create a new sub-trie that points to the existing object with the new
+  // index for the next level.
+  assert(NumSubtrieBits > 0);
+  std::unique_ptr<TrieSubtrie> S = create(StartBit + NumBits, NumSubtrieBits);
+
+  assert(NewI < Size);
+  S->get(NewI).store(&Content);
+
+  // Using compare_exchange to atomically add back the new sub-trie to the trie
+  // in the place of the exsiting object.
+  TrieNode *ExistingNode = &Content;
+  assert(I < Size);
+  if (get(I).compare_exchange_strong(ExistingNode, S.get()))
+    return Saver(std::move(S));
+
+  // Another thread created a subtrie already. Return it and let "S" be
+  // destructed.
+  return cast<TrieSubtrie>(ExistingNode);
+}
+
+class ThreadSafeTrieRawHashMapBase::ImplType final
+    : private TrailingObjects<ThreadSafeTrieRawHashMapBase::ImplType,
+                              TrieSubtrie> {
+public:
+  static std::unique_ptr<ImplType> create(size_t StartBit, size_t NumBits) {
+    size_t Size = sizeof(ImplType) + TrieSubtrie::sizeToAlloc(NumBits);
+    void *Memory = ::operator new(Size);
+    ImplType *Impl = ::new (Memory) ImplType(StartBit, NumBits);
+    return std::unique_ptr<ImplType>(Impl);
+  }
+
+  // Save the Subtrie into the ownship list of the trie structure in a
+  // thread-safe way. The ownership transfer is done by compare_exchange the
+  // pointer value inside the unique_ptr.
+  TrieSubtrie *save(std::unique_ptr<TrieSubtrie> S) {
+    assert(!S->Next && "Expected S to a freshly-constructed leaf");
+
+    TrieSubtrie *CurrentHead = nullptr;
+    // Add ownership of "S" to front of the list, so that Root -> S ->
+    // Root.Next. This works by repeatedly setting S->Next to a candidate value
+    // of Root.Next (initially nullptr), then setting Root.Next to S once the
+    // candidate matches reality.
+    while (!getRoot()->Next.compare_exchange_weak(CurrentHead, S.get()))
+      S->Next.exchange(CurrentHead);
+
+    // Ownership transferred to subtrie successfully. Release the unique_ptr.
+    return S.release();
+  }
+
+  // Get the root which is the trailing object.
+  TrieSubtrie *getRoot() { return getTrailingObjects<TrieSubtrie>(); }
+
+  static void *operator new(size_t Size) { return ::operator new(Size); }
+  void operator delete(void *Ptr) { ::operator delete(Ptr); }
+
+  /// FIXME: This should take a function that allocates and constructs the
+  /// content lazily (taking the hash as a separate parameter), in case of
+  /// collision.
+  ThreadSafeAllocator<BumpPtrAllocator> ContentAlloc;
+
+private:
+  friend class TrailingObjects;
+
+  ImplType(size_t StartBit, size_t NumBits) {
+    ::new (getRoot()) TrieSubtrie(StartBit, NumBits);
+  }
+};
+
+ThreadSafeTrieRawHashMapBase::ImplType &
+ThreadSafeTrieRawHashMapBase::getOrCreateImpl() {
+  if (ImplType *Impl = ImplPtr.load())
+    return *Impl;
+
+  // Create a new ImplType and store it if another thread doesn't do so first.
+  // If another thread wins this one is destroyed locally.
+  std::unique_ptr<ImplType> Impl = ImplType::create(0, NumRootBits);
+  ImplType *ExistingImpl = nullptr;
+
+  // If the ownership transferred succesfully, release unique_ptr and return
+  // the pointer to the new ImplType.
+  if (ImplPtr.compare_exchange_strong(ExistingImpl, Impl.get()))
+    return *Impl.release();
+
+  // Already created, return the existing ImplType.
+  return *ExistingImpl;
+}
+
+ThreadSafeTrieRawHashMapBase::PointerBase
+ThreadSafeTrieRawHashMapBase::find(ArrayRef<uint8_t> Hash) const {
+  assert(!Hash.empty() && "Uninitialized hash");
+
+  ImplType *Impl = ImplPtr.load();
+  if (!Impl)
+    return PointerBase();
+
+  TrieSubtrie *S = Impl->getRoot();
+  TrieHashIndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash};
+  size_t Index = IndexGen.next();
+  while (Index != IndexGen.end()) {
+    // Try to set the content.
+    TrieNode *Existing = S->get(Index);
+    if (!Existing)
+      return PointerBase(S, Index, *IndexGen.StartBit);
+
+    // Check for an exact match.
+    if (auto *ExistingContent = dyn_cast<TrieContent>(Existing))
+      return ExistingContent->getHash() == Hash
+                 ? PointerBase(ExistingContent->getValuePointer())
+                 : PointerBase(S, Index, *IndexGen.StartBit);
+
+    Index = IndexGen.next();
+    S = cast<TrieSubtrie>(Existing);
+  }
+  llvm_unreachable("failed to locate the node after consuming all hash bytes");
+}
+
+ThreadSafeTrieRawHashMapBase::PointerBase ThreadSafeTrieRawHashMapBase::insert(
+    PointerBase Hint, ArrayRef<uint8_t> Hash,
+    function_ref<const uint8_t *(void *Mem, ArrayRef<uint8_t> Hash)>
+        Constructor) {
+  assert(!Hash.empty() && "Uninitialized hash");
+
+  ImplType &Impl = getOrCreateImpl();
+  TrieSubtrie *S = Impl.getRoot();
+  TrieHashIndexGenerator IndexGen{NumRootBits, NumSubtrieBits, Hash};
+  size_t Index;
+  if (Hint.isHint()) {
+    S = static_cast<TrieSubtrie *>(Hint.P);
+    Index = IndexGen.hint(Hint.I, Hint.B);
+  } else {
+    Index = IndexGen.next();
+  }
+
+  while (Index != IndexGen.end()) {
+    // Load the node from the slot, allocating and calling the constructor if
+    // the slot is empty.
+    bool Generated = false;
+    TrieNode &Existing = S->get(Index).loadOrGenerate([&]() {
+      Generated = true;
+
+      // Construct the value itself at the tail.
+      uint8_t *Memory = reinterpret_cast<uint8_t *>(
+          Impl.ContentAlloc.Allocate(ContentAllocSize, ContentAllocAlign));
+      const uint8_t *HashStorage = Constructor(Memory + ContentOffset, Hash);
+
+      // Construct the TrieContent header, passing in the offset to the hash.
+      TrieContent *Content = ::new (Memory)
+          TrieContent(ContentOffset, Hash.size(), HashStorage - Memory);
+      assert(Hash == Content->getHash() && "Hash not properly initialized");
+      return Content;
+    });
+    // If we just generated it, return it!
+    if (Generated)
+      return PointerBase(cast<TrieContent>(Existing).getValuePointer());
+
+    if (auto *ST = dyn_cast<TrieSubtrie>(&Existing)) {
+      S = ST;
+      Index = IndexGen.next();
+      continue;
+    }
+
+    // Return the existing content if it's an exact match!
+    auto &ExistingContent = cast<TrieContent>(Existing);
+    if (ExistingContent.getHash() == Hash)
+      return PointerBase(ExistingContent.getValuePointer());
+
+    // Sink the existing content as long as the indexes match.
+    size_t NextIndex = IndexGen.next();
+    while (NextIndex != IndexGen.end()) {
+      size_t NewIndexForExistingContent =
+          IndexGen.getCollidingBits(ExistingContent.getHash());
+      S = S->sink(Index, ExistingContent, IndexGen.getNumBits(),
+                  NewIndexForExistingContent,
+                  [&Impl](std::unique_ptr<TrieSubtrie> S) {
+                    return Impl.save(std::move(S));
+                  });
+      Index = NextIndex;
+
+      // Found the difference.
+      if (NextIndex != NewIndexForExistingContent)
+        break;
+
+      NextIndex = IndexGen.next();
+    }
+  }
+  llvm_unreachable("failed to insert the node after consuming all hash bytes");
+}
+
+ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase(
+    size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset,
+    std::optional<size_t> NumRootBits, std::optional<size_t> NumSubtrieBits)
+    : ContentAllocSize(ContentAllocSize), ContentAllocAlign(ContentAllocAlign),
+      ContentOffset(ContentOffset),
+      NumRootBits(NumRootBits ? *NumRootBits : DefaultNumRootBits),
+      NumSubtrieBits(NumSubtrieBits ? *NumSubtrieBits : DefaultNumSubtrieBits),
+      ImplPtr(nullptr) {
+  // Assertion checks for reasonable configuration. The settings below are not
+  // hard limits on most platforms, but a reasonable configuration should fall
+  // within those limits.
+  assert((!NumRootBits || *NumRootBits < 20) &&
+         "Root should have fewer than ~1M slots");
+  assert((!NumSubtrieBits || *NumSubtrieBits < 10) &&
+         "Subtries should have fewer than ~1K slots");
+}
+
+ThreadSafeTrieRawHashMapBase::ThreadSafeTrieRawHashMapBase(
+    ThreadSafeTrieRawHashMapBase &&RHS)
+    : ContentAllocSize(RHS.ContentAllocSize),
+      ContentAllocAlign(RHS.ContentAllocAlign),
+      ContentOffset(RHS.ContentOffset), NumRootBits(RHS.NumRootBits),
+      NumSubtrieBits(RHS.NumSubtrieBits) {
+  // Steal the root from RHS.
+  ImplPtr = RHS.ImplPtr.exchange(nullptr);
+}
+
+ThreadSafeTrieRawHashMapBase::~ThreadSafeTrieRawHashMapBase() {
+  assert(!ImplPtr.load() && "Expected subclass to call destroyImpl()");
+}
+
+void ThreadSafeTrieRawHashMapBase::destroyImpl(
+    function_ref<void(void *)> Destructor) {
+  std::unique_ptr<ImplType> Impl(ImplPtr.exchange(nullptr));
+  if (!Impl)
+    return;
+
+  // Destroy content nodes throughout trie. Avoid destroying any subtries since
+  // we need TrieNode::classof() to find the content nodes.
+  //
+  // FIXME: Once we have bitsets (see FIXME in TrieSubtrie class), use them
+  // facilitate sparse iteration here.
+  if (Destructor)
+    for (TrieSubtrie *Trie = Impl->getRoot(); Trie; Trie = Trie->Next.load())
+      for (unsigned I = 0; I < Trie->size(); ++I)
+        if (auto *Content = dyn_cast_or_null<TrieContent>(Trie->load(I)))
+          Destructor(Content->getValuePointer());
+
+  // Destroy the subtries. Incidentally, this destroys them in the reverse order
+  // of saving.
+  TrieSubtrie *Trie = Impl->getRoot()->Next;
+  while (Trie) {
+    TrieSubtrie *Next = Trie->Next.exchange(nullptr);
+    delete Trie;
+    Trie = Next;
+  }
+}
+
+ThreadSafeTrieRawHashMapBase::PointerBase
+ThreadSafeTrieRawHashMapBase::getRoot() const {
+  ImplType *Impl = ImplPtr.load();
+  if (!Impl)
+    return PointerBase();
+  return PointerBase(Impl->getRoot());
+}
+
+unsigned ThreadSafeTrieRawHashMapBase::getStartBit(
+    ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+  assert(!P.isHint() && "Not a valid trie");
+  if (!P.P)
+    return 0;
+  if (auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P))
+    return S->StartBit;
+  return 0;
+}
+
+unsigned ThreadSafeTrieRawHashMapBase::getNumBits(
+    ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+  assert(!P.isHint() && "Not a valid trie");
+  if (!P.P)
+    return 0;
+  if (auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P))
+    return S->NumBits;
+  return 0;
+}
+
+unsigned ThreadSafeTrieRawHashMapBase::getNumSlotUsed(
+    ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+  assert(!P.isHint() && "Not a valid trie");
+  if (!P.P)
+    return 0;
+  auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P);
+  if (!S)
+    return 0;
+  unsigned Num = 0;
+  for (unsigned I = 0, E = S->size(); I < E; ++I)
+    if (S->load(I))
+      ++Num;
+  return Num;
+}
+
+std::string ThreadSafeTrieRawHashMapBase::getTriePrefixAsString(
+    ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+  assert(!P.isHint() && "Not a valid trie");
+  if (!P.P)
+    return "";
+
+  auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P);
+  if (!S || !S->IsSubtrie)
+    return "";
+
+  // Find a TrieContent node which has hash stored. Depth search following the
+  // first used slot until a TrieContent node is found.
+  TrieSubtrie *Current = S;
+  TrieContent *Node = nullptr;
+  while (Current) {
+    TrieSubtrie *Next = nullptr;
+    // Find first used slot in the trie.
+    for (unsigned I = 0, E = Current->size(); I < E; ++I) {
+      auto *S = Current->load(I);
+      if (!S)
+        continue;
+
+      if (auto *Content = dyn_cast<TrieContent>(S))
+        Node = Content;
+      else if (auto *Sub = dyn_cast<TrieSubtrie>(S))
+        Next = Sub;
+      break;
+    }
+
+    // Found the node.
+    if (Node)
+      break;
+
+    // Continue to the next level if the node is not found.
+    Current = Next;
+  }
+
+  assert(Node && "malformed trie, cannot find TrieContent on leaf node");
+  // The prefix for the current trie is the first `StartBit` of the content
+  // stored underneath this subtrie.
+  std::string Str;
+  raw_string_ostream SS(Str);
+
+  unsigned StartFullBytes = (S->StartBit + 1) / 8 - 1;
+  SS << toHex(toStringRef(Node->getHash()).take_front(StartFullBytes),
+              /*LowerCase=*/true);
+
+  // For the part of the prefix that doesn't fill a byte, print raw bit values.
+  std::string Bits;
+  for (unsigned I = StartFullBytes * 8, E = S->StartBit; I < E; ++I) {
+    unsigned Index = I / 8;
+    unsigned Offset = 7 - I % 8;
+    Bits.push_back('0' + ((Node->getHash()[Index] >> Offset) & 1));
+  }
+
+  if (!Bits.empty())
+    SS << "[" << Bits << "]";
+
+  return SS.str();
+}
+
+unsigned ThreadSafeTrieRawHashMapBase::getNumTries() const {
+  ImplType *Impl = ImplPtr.load();
+  if (!Impl)
+    return 0;
+  unsigned Num = 0;
+  for (TrieSubtrie *Trie = Impl->getRoot(); Trie; Trie = Trie->Next.load())
+    ++Num;
+  return Num;
+}
+
+ThreadSafeTrieRawHashMapBase::PointerBase
+ThreadSafeTrieRawHashMapBase::getNextTrie(
+    ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+  assert(!P.isHint() && "Not a valid trie");
+  if (!P.P)
+    return PointerBase();
+  auto *S = dyn_cast<TrieSubtrie>((TrieNode *)P.P);
+  if (!S)
+    return PointerBase();
+  if (auto *E = S->Next.load())
+    return PointerBase(E);
+  return PointerBase();
+}
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index 8fe7f69ecf8e593..1e93b2c160ba58e 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -89,7 +89,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef<std::string> Macros) : SrcMgr(SM) {
   for (StringRef MacroName : Macros) {
     const char *End = lexMacroName(MacroName);
     if (End != MacroName.end())
-      PrintFatalError("Invalid macro name `" + MacroName +
+      PrintFatalError("invalid macro name `" + MacroName +
                       "` specified on command line");
 
     DefinedMacros.insert(MacroName);
@@ -188,7 +188,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
       return LexIdentifier();
 
     // Unknown character, emit an error.
-    return ReturnError(TokStart, "Unexpected character");
+    return ReturnError(TokStart, "unexpected character");
   case EOF:
     // Lex next token, if we just left an include file.
     // Note that leaving an include file means that the next
@@ -231,7 +231,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
         ++CurPtr; // Eat third dot.
         return tgtok::dotdotdot;
       }
-      return ReturnError(TokStart, "Invalid '..' punctuation");
+      return ReturnError(TokStart, "invalid '..' punctuation");
     }
     return tgtok::dot;
 
@@ -255,7 +255,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) {
       if (SkipCComment())
         return tgtok::Error;
     } else // Otherwise, this is an error.
-      return ReturnError(TokStart, "Unexpected character");
+      return ReturnError(TokStart, "unexpected character");
     return LexToken(FileOrLineStart);
   case '-': case '+':
   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
@@ -313,10 +313,10 @@ tgtok::TokKind TGLexer::LexString() {
   while (*CurPtr != '"') {
     // If we hit the end of the buffer, report an error.
     if (*CurPtr == 0 && CurPtr == CurBuf.end())
-      return ReturnError(StrStart, "End of file in string literal");
+      return ReturnError(StrStart, "end of file in string literal");
 
     if (*CurPtr == '\n' || *CurPtr == '\r')
-      return ReturnError(StrStart, "End of line in string literal");
+      return ReturnError(StrStart, "end of line in string literal");
 
     if (*CurPtr != '\\') {
       CurStrVal += *CurPtr++;
@@ -346,7 +346,7 @@ tgtok::TokKind TGLexer::LexString() {
     // If we hit the end of the buffer, report an error.
     case '\0':
       if (CurPtr == CurBuf.end())
-        return ReturnError(StrStart, "End of file in string literal");
+        return ReturnError(StrStart, "end of file in string literal");
       [[fallthrough]];
     default:
       return ReturnError(CurPtr, "invalid escape in string literal");
@@ -359,7 +359,7 @@ tgtok::TokKind TGLexer::LexString() {
 
 tgtok::TokKind TGLexer::LexVarName() {
   if (!isValidIDChar(CurPtr[0], /*First=*/true))
-    return ReturnError(TokStart, "Invalid variable name");
+    return ReturnError(TokStart, "invalid variable name");
 
   // Otherwise, we're ok, consume the rest of the characters.
   const char *VarNameStart = CurPtr++;
@@ -433,7 +433,7 @@ bool TGLexer::LexInclude() {
   tgtok::TokKind Tok = LexToken();
   if (Tok == tgtok::Error) return true;
   if (Tok != tgtok::StrVal) {
-    PrintError(getLoc(), "Expected filename after include");
+    PrintError(getLoc(), "expected filename after include");
     return true;
   }
 
@@ -444,7 +444,7 @@ bool TGLexer::LexInclude() {
   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
                                     IncludedFile);
   if (!CurBuffer) {
-    PrintError(getLoc(), "Could not find include file '" + Filename + "'");
+    PrintError(getLoc(), "could not find include file '" + Filename + "'");
     return true;
   }
 
@@ -476,7 +476,7 @@ bool TGLexer::SkipCComment() {
     int CurChar = getNextChar();
     switch (CurChar) {
     case EOF:
-      PrintError(TokStart, "Unterminated comment!");
+      PrintError(TokStart, "unterminated comment");
       return true;
     case '*':
       // End of the comment?
@@ -543,7 +543,7 @@ tgtok::TokKind TGLexer::LexNumber() {
 
   // Requires at least one digit.
   if (CurPtr == NumStart)
-    return ReturnError(TokStart, "Invalid number");
+    return ReturnError(TokStart, "invalid number");
 
   errno = 0;
   if (IsMinus)
@@ -552,9 +552,9 @@ tgtok::TokKind TGLexer::LexNumber() {
     CurIntVal = strtoull(NumStart, nullptr, Base);
 
   if (errno == EINVAL)
-    return ReturnError(TokStart, "Invalid number");
+    return ReturnError(TokStart, "invalid number");
   if (errno == ERANGE)
-    return ReturnError(TokStart, "Number out of range");
+    return ReturnError(TokStart, "number out of range");
 
   return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal;
 }
@@ -580,13 +580,13 @@ tgtok::TokKind TGLexer::LexBracket() {
     }
   }
 
-  return ReturnError(CodeStart - 2, "Unterminated code block");
+  return ReturnError(CodeStart - 2, "unterminated code block");
 }
 
 /// LexExclaim - Lex '!' and '![a-zA-Z]+'.
 tgtok::TokKind TGLexer::LexExclaim() {
   if (!isAlpha(*CurPtr))
-    return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
+    return ReturnError(CurPtr - 1, "invalid \"!operator\"");
 
   const char *Start = CurPtr++;
   while (isAlpha(*CurPtr))
@@ -648,7 +648,8 @@ tgtok::TokKind TGLexer::LexExclaim() {
           .Case("repr", tgtok::XRepr)
           .Default(tgtok::Error);
 
-  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+  return Kind != tgtok::Error ? Kind
+                              : ReturnError(Start - 1, "unknown operator");
 }
 
 bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
@@ -662,17 +663,17 @@ bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) {
 
   // Pop the preprocessing controls from the include stack.
   if (PrepIncludeStack.empty()) {
-    PrintFatalError("Preprocessor include stack is empty");
+    PrintFatalError("preprocessor include stack is empty");
   }
 
   PrepIncludeStack.pop_back();
 
   if (IncludeStackMustBeEmpty) {
     if (!PrepIncludeStack.empty())
-      PrintFatalError("Preprocessor include stack is not empty");
+      PrintFatalError("preprocessor include stack is not empty");
   } else {
     if (PrepIncludeStack.empty())
-      PrintFatalError("Preprocessor include stack is empty");
+      PrintFatalError("preprocessor include stack is empty");
   }
 
   return true;
@@ -732,7 +733,7 @@ bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) {
       return true;
     }
 
-  PrintFatalError("Unsupported preprocessing token in "
+  PrintFatalError("unsupported preprocessing token in "
                   "prepEatPreprocessorDirective()");
   return false;
 }
@@ -748,7 +749,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
     StringRef MacroName = prepLexMacroName();
     StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef";
     if (MacroName.empty())
-      return ReturnError(TokStart, "Expected macro name after " + IfTokName);
+      return ReturnError(TokStart, "expected macro name after " + IfTokName);
 
     bool MacroIsDefined = DefinedMacros.count(MacroName) != 0;
 
@@ -763,7 +764,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
         {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)});
 
     if (!prepSkipDirectiveEnd())
-      return ReturnError(CurPtr, "Only comments are supported after " +
+      return ReturnError(CurPtr, "only comments are supported after " +
                                      IfTokName + " NAME");
 
     // If we were not processing tokens before this #ifdef,
@@ -794,7 +795,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
 
     if (IfdefEntry.Kind != tgtok::Ifdef) {
       PrintError(TokStart, "double #else");
-      return ReturnError(IfdefEntry.SrcPos, "Previous #else is here");
+      return ReturnError(IfdefEntry.SrcPos, "previous #else is here");
     }
 
     // Replace the corresponding #ifdef's control with its negation
@@ -804,7 +805,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
         {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)});
 
     if (!prepSkipDirectiveEnd())
-      return ReturnError(CurPtr, "Only comments are supported after #else");
+      return ReturnError(CurPtr, "only comments are supported after #else");
 
     // If we were processing tokens before this #else,
     // we have to start skipping lines until the matching #endif.
@@ -827,12 +828,12 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
 
     if (IfdefOrElseEntry.Kind != tgtok::Ifdef &&
         IfdefOrElseEntry.Kind != tgtok::Else) {
-      PrintFatalError("Invalid preprocessor control on the stack");
+      PrintFatalError("invalid preprocessor control on the stack");
       return tgtok::Error;
     }
 
     if (!prepSkipDirectiveEnd())
-      return ReturnError(CurPtr, "Only comments are supported after #endif");
+      return ReturnError(CurPtr, "only comments are supported after #endif");
 
     PrepIncludeStack.back()->pop_back();
 
@@ -847,15 +848,15 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
   } else if (Kind == tgtok::Define) {
     StringRef MacroName = prepLexMacroName();
     if (MacroName.empty())
-      return ReturnError(TokStart, "Expected macro name after #define");
+      return ReturnError(TokStart, "expected macro name after #define");
 
     if (!DefinedMacros.insert(MacroName).second)
       PrintWarning(getLoc(),
-                   "Duplicate definition of macro: " + Twine(MacroName));
+                   "duplicate definition of macro: " + Twine(MacroName));
 
     if (!prepSkipDirectiveEnd())
       return ReturnError(CurPtr,
-                         "Only comments are supported after #define NAME");
+                         "only comments are supported after #define NAME");
 
     if (!ReturnNextLiveToken) {
       PrintFatalError("#define must be ignored during the lines skipping");
@@ -865,13 +866,13 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind,
     return LexToken();
   }
 
-  PrintFatalError("Preprocessing directive is not supported");
+  PrintFatalError("preprocessing directive is not supported");
   return tgtok::Error;
 }
 
 bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
   if (!MustNeverBeFalse)
-    PrintFatalError("Invalid recursion.");
+    PrintFatalError("invalid recursion.");
 
   do {
     // Skip all symbols to the line end.
@@ -917,7 +918,7 @@ bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) {
     // due to #else or #endif.
     if (prepIsProcessingEnabled()) {
       if (Kind != tgtok::Else && Kind != tgtok::Endif) {
-        PrintFatalError("Tokens processing was enabled by an unexpected "
+        PrintFatalError("tokens processing was enabled by an unexpected "
                         "preprocessing directive");
         return false;
       }
@@ -1032,7 +1033,7 @@ bool TGLexer::prepSkipDirectiveEnd() {
           return false;
       } else {
         TokStart = CurPtr;
-        PrintError(CurPtr, "Unexpected character");
+        PrintError(CurPtr, "unexpected character");
         return false;
       }
 
@@ -1067,8 +1068,8 @@ void TGLexer::prepReportPreprocessorStackError() {
                     "empty control stack");
 
   auto &PrepControl = PrepIncludeStack.back()->back();
-  PrintError(CurBuf.end(), "Reached EOF without matching #endif");
-  PrintError(PrepControl.SrcPos, "The latest preprocessor control is here");
+  PrintError(CurBuf.end(), "reached EOF without matching #endif");
+  PrintError(PrepControl.SrcPos, "the latest preprocessor control is here");
 
   TokStart = CurPtr;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 9bb508b783c36a0..6854cccaafa1d78 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -73,7 +73,8 @@ def SVEUnsupported : AArch64Unsupported {
                       SVE2Unsupported.F);
 }
 
-let F = [HasSME2p2, HasSVE2p2orSME2p2] in
+let F = [HasSME2p2, HasSVE2p2orSME2p2, HasNonStreamingSVEorSME2p2,
+         HasNonStreamingSVE2p2orSME2p2] in
 def SME2p2Unsupported : AArch64Unsupported;
 
 def SME2p1Unsupported : AArch64Unsupported {
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index a982ea67a0f2792..6d2dd0ecbccf317 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -150,6 +150,12 @@ class AArch64AsmPrinter : public AsmPrinter {
   // Emit the sequence for BRA/BLRA (authenticate + branch/call).
   void emitPtrauthBranch(const MachineInstr *MI);
 
+  void emitPtrauthCheckAuthenticatedValue(Register TestedReg,
+                                          Register ScratchReg,
+                                          AArch64PACKey::ID Key,
+                                          bool ShouldTrap,
+                                          const MCSymbol *OnFailure);
+
   // Emit the sequence for AUT or AUTPAC.
   void emitPtrauthAuthResign(const MachineInstr *MI);
 
@@ -1719,45 +1725,37 @@ unsigned AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc,
   return AArch64::X17;
 }
 
-void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
-  const bool IsAUTPAC = MI->getOpcode() == AArch64::AUTPAC;
-
-  // We can expand AUT/AUTPAC into 3 possible sequences:
-  // - unchecked:
-  //      autia x16, x0
-  //      pacib x16, x1 ; if AUTPAC
+/// Emits a code sequence to check an authenticated pointer value.
+///
+/// If OnFailure argument is passed, jump there on check failure instead
+/// of proceeding to the next instruction (only if ShouldTrap is false).
+void AArch64AsmPrinter::emitPtrauthCheckAuthenticatedValue(
+    Register TestedReg, Register ScratchReg, AArch64PACKey::ID Key,
+    bool ShouldTrap, const MCSymbol *OnFailure) {
+  // Insert a sequence to check if authentication of TestedReg succeeded,
+  // such as:
   //
   // - checked and clearing:
-  //      mov x17, x0
-  //      movk x17, #disc, lsl #48
-  //      autia x16, x17
+  //      ; x16 is TestedReg, x17 is ScratchReg
   //      mov x17, x16
   //      xpaci x17
   //      cmp x16, x17
   //      b.eq Lsuccess
   //      mov x16, x17
   //      b Lend
-  //     Lsuccess:
-  //      mov x17, x1
-  //      movk x17, #disc, lsl #48
-  //      pacib x16, x17
-  //     Lend:
-  //   Where we only emit the AUT if we started with an AUT.
+  //    Lsuccess:
+  //      ; skipped if authentication failed
+  //    Lend:
+  //      ...
   //
   // - checked and trapping:
-  //      mov x17, x0
-  //      movk x17, #disc, lsl #48
-  //      autia x16, x0
   //      mov x17, x16
   //      xpaci x17
   //      cmp x16, x17
   //      b.eq Lsuccess
   //      brk #<0xc470 + aut key>
-  //     Lsuccess:
-  //      mov x17, x1
-  //      movk x17, #disc, lsl #48
-  //      pacib x16, x17 ; if AUTPAC
-  //   Where the b.eq skips over the trap if the PAC is valid.
+  //    Lsuccess:
+  //      ...
   //
   // This sequence is expensive, but we need more information to be able to
   // do better.
@@ -1770,6 +1768,71 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
   // Either way, we also don't always know whether TBI is enabled or not for
   // the specific target environment.
 
+  unsigned XPACOpc = getXPACOpcodeForKey(Key);
+
+  MCSymbol *SuccessSym = createTempSymbol("auth_success_");
+
+  //  mov Xscratch, Xtested
+  emitMovXReg(ScratchReg, TestedReg);
+
+  //  xpac(i|d) Xscratch
+  EmitToStreamer(MCInstBuilder(XPACOpc).addReg(ScratchReg).addReg(ScratchReg));
+
+  //  cmp Xtested, Xscratch
+  EmitToStreamer(MCInstBuilder(AArch64::SUBSXrs)
+                     .addReg(AArch64::XZR)
+                     .addReg(TestedReg)
+                     .addReg(ScratchReg)
+                     .addImm(0));
+
+  //  b.eq Lsuccess
+  EmitToStreamer(MCInstBuilder(AArch64::Bcc)
+                     .addImm(AArch64CC::EQ)
+                     .addExpr(MCSymbolRefExpr::create(SuccessSym, OutContext)));
+
+  if (ShouldTrap) {
+    assert(!OnFailure && "Cannot specify OnFailure with ShouldTrap");
+    // Trapping sequences do a 'brk'.
+    //  brk #<0xc470 + aut key>
+    EmitToStreamer(MCInstBuilder(AArch64::BRK).addImm(0xc470 | Key));
+  } else {
+    // Non-trapping checked sequences return the stripped result in TestedReg,
+    // skipping over success-only code (such as re-signing the pointer) if
+    // there is one.
+    // Note that this can introduce an authentication oracle (such as based on
+    // the high bits of the re-signed value).
+
+    // FIXME: Can we simply return the AUT result, already in TestedReg?
+    //  mov Xtested, Xscratch
+    emitMovXReg(TestedReg, ScratchReg);
+
+    if (OnFailure) {
+      //  b Lend
+      EmitToStreamer(
+          MCInstBuilder(AArch64::B)
+              .addExpr(MCSymbolRefExpr::create(OnFailure, OutContext)));
+    }
+  }
+
+  // If the auth check succeeds, we can continue.
+  // Lsuccess:
+  OutStreamer->emitLabel(SuccessSym);
+}
+
+void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
+  const bool IsAUTPAC = MI->getOpcode() == AArch64::AUTPAC;
+
+  // We expand AUT/AUTPAC into a sequence of the form
+  //
+  //      ; authenticate x16
+  //      ; check pointer in x16
+  //    Lsuccess:
+  //      ; sign x16 (if AUTPAC)
+  //    Lend:   ; if not trapping on failure
+  //
+  // with the checking sequence chosen depending on whether we should check
+  // the pointer and whether we should trap on failure.
+
   // By default, auth/resign sequences check for auth failures.
   bool ShouldCheck = true;
   // In the checked sequence, we only trap if explicitly requested.
@@ -1800,8 +1863,6 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
   uint64_t AUTDisc = MI->getOperand(1).getImm();
   unsigned AUTAddrDisc = MI->getOperand(2).getReg();
 
-  unsigned XPACOpc = getXPACOpcodeForKey(AUTKey);
-
   // Compute aut discriminator into x17
   assert(isUInt<16>(AUTDisc));
   unsigned AUTDiscReg = emitPtrauthDiscriminator(AUTDisc, AUTAddrDisc);
@@ -1824,59 +1885,12 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) {
 
   MCSymbol *EndSym = nullptr;
 
-  // Checked sequences do an additional strip-and-compare.
   if (ShouldCheck) {
-    MCSymbol *SuccessSym = createTempSymbol("auth_success_");
-
-    // XPAC has tied src/dst: use x17 as a temporary copy.
-    //  mov x17, x16
-    emitMovXReg(AArch64::X17, AArch64::X16);
-
-    //  xpaci x17
-    EmitToStreamer(
-        *OutStreamer,
-        MCInstBuilder(XPACOpc).addReg(AArch64::X17).addReg(AArch64::X17));
-
-    //  cmp x16, x17
-    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::SUBSXrs)
-                                     .addReg(AArch64::XZR)
-                                     .addReg(AArch64::X16)
-                                     .addReg(AArch64::X17)
-                                     .addImm(0));
-
-    //  b.eq Lsuccess
-    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::Bcc)
-                                     .addImm(AArch64CC::EQ)
-                                     .addExpr(MCSymbolRefExpr::create(
-                                         SuccessSym, OutContext)));
-
-    if (ShouldTrap) {
-      // Trapping sequences do a 'brk'.
-      //  brk #<0xc470 + aut key>
-      EmitToStreamer(*OutStreamer,
-                     MCInstBuilder(AArch64::BRK).addImm(0xc470 | AUTKey));
-    } else {
-      // Non-trapping checked sequences return the stripped result in x16,
-      // skipping over the PAC if there is one.
-
-      // FIXME: can we simply return the AUT result, already in x16? without..
-      //        ..traps this is usable as an oracle anyway, based on high bits
-      //  mov x17, x16
-      emitMovXReg(AArch64::X16, AArch64::X17);
-
-      if (IsAUTPAC) {
-        EndSym = createTempSymbol("resign_end_");
-
-        //  b Lend
-        EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::B)
-                                         .addExpr(MCSymbolRefExpr::create(
-                                             EndSym, OutContext)));
-      }
-    }
+    if (IsAUTPAC && !ShouldTrap)
+      EndSym = createTempSymbol("resign_end_");
 
-    // If the auth check succeeds, we can continue.
-    // Lsuccess:
-    OutStreamer->emitLabel(SuccessSym);
+    emitPtrauthCheckAuthenticatedValue(AArch64::X16, AArch64::X17, AUTKey,
+                                       ShouldTrap, EndSym);
   }
 
   // We already emitted unchecked and checked-but-non-trapping AUTs.
diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td
index 7146b041fe5d150..12d841445b80f75 100644
--- a/llvm/lib/Target/AArch64/AArch64FMV.td
+++ b/llvm/lib/Target/AArch64/AArch64FMV.td
@@ -81,9 +81,6 @@ def : FMVExtension<"sme-i16i64", "FEAT_SME_I64", "+sme,+sme-i16i64,+bf16", 570>;
 def : FMVExtension<"sme2", "FEAT_SME2", "+sme2,+sme,+bf16", 580>;
 def : FMVExtension<"ssbs", "FEAT_SSBS2", "+ssbs", 490>;
 def : FMVExtension<"sve", "FEAT_SVE", "+sve,+fullfp16,+fp-armv8,+neon", 310>;
-def : FMVExtension<"sve-bf16", "FEAT_SVE_BF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 320>;
-def : FMVExtension<"sve-ebf16", "FEAT_SVE_EBF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 330>;
-def : FMVExtension<"sve-i8mm", "FEAT_SVE_I8MM", "+sve,+i8mm,+fullfp16,+fp-armv8,+neon", 340>;
 def : FMVExtension<"sve2", "FEAT_SVE2", "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370>;
 def : FMVExtension<"sve2-aes", "FEAT_SVE_PMULL128", "+sve2,+sve,+aes,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380>;
 def : FMVExtension<"sve2-bitperm", "FEAT_SVE_BITPERM", "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400>;
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index cbf38f2c57a35e9..6c874fcabcc3022 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -19,6 +19,7 @@
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
+#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bbf2f2677954577..9af6429c5caee0d 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -126,14 +126,15 @@
 // and the SME unit try to access the same area of memory, including if the
 // access is to an area of the stack. To try to alleviate this we attempt to
 // introduce extra padding into the stack frame between FP and GPR accesses,
-// controlled by the StackHazardSize option. Without changing the layout of the
-// stack frame in the diagram above, a stack object of size StackHazardSize is
-// added between GPR and FPR CSRs. Another is added to the stack objects
-// section, and stack objects are sorted so that FPR > Hazard padding slot >
-// GPRs (where possible). Unfortunately some things are not handled well (VLA
-// area, arguments on the stack, object with both GPR and FPR accesses), but if
-// those are controlled by the user then the entire stack frame becomes GPR at
-// the start/end with FPR in the middle, surrounded by Hazard padding.
+// controlled by the aarch64-stack-hazard-size option. Without changing the
+// layout of the stack frame in the diagram above, a stack object of size
+// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added
+// to the stack objects section, and stack objects are sorted so that FPR >
+// Hazard padding slot > GPRs (where possible). Unfortunately some things are
+// not handled well (VLA area, arguments on the stack, objects with both GPR and
+// FPR accesses), but if those are controlled by the user then the entire stack
+// frame becomes GPR at the start/end with FPR in the middle, surrounded by
+// Hazard padding.
 //
 // An example of the prologue:
 //
@@ -211,6 +212,7 @@
 #include "AArch64TargetMachine.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -273,9 +275,6 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
     cl::desc("Emit homogeneous prologue and epilogue for the size "
              "optimization (default = off)"));
 
-// Stack hazard padding size. 0 = disabled.
-static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
-                                         cl::init(0), cl::Hidden);
 // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
 static cl::opt<unsigned>
     StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
@@ -702,7 +701,10 @@ void AArch64FrameLowering::resetCFIToInitialState(
 
   // Flip the RA sign state.
   if (MFI.shouldSignReturnAddress(MF)) {
-    CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+    auto CFIInst = MFI.branchProtectionPAuthLR()
+                       ? MCCFIInstruction::createNegateRAStateWithPC(nullptr)
+                       : MCCFIInstruction::createNegateRAState(nullptr);
+    CFIIndex = MF.addFrameInst(CFIInst);
     BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex);
   }
 
@@ -1614,6 +1616,10 @@ static bool isTargetWindows(const MachineFunction &MF) {
   return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
 }
 
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+  return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
 // Convenience function to determine whether I is an SVE callee save.
 static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
   switch (I->getOpcode()) {
@@ -2985,6 +2991,7 @@ static void computeCalleeSaveRegisterPairs(
   bool IsWindows = isTargetWindows(MF);
   bool NeedsWinCFI = needsWinCFI(MF);
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  unsigned StackHazardSize = getStackHazardSize(MF);
   MachineFrameInfo &MFI = MF.getFrameInfo();
   CallingConv::ID CC = MF.getFunction().getCallingConv();
   unsigned Count = CSI.size();
@@ -3612,6 +3619,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
 // which can be used to determine if any hazard padding is needed.
 void AArch64FrameLowering::determineStackHazardSlot(
     MachineFunction &MF, BitVector &SavedRegs) const {
+  unsigned StackHazardSize = getStackHazardSize(MF);
   if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
       MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
     return;
@@ -3802,7 +3810,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   // StackHazardSize if so.
   determineStackHazardSlot(MF, SavedRegs);
   if (AFI->hasStackHazardSlotIndex())
-    CSStackSize += StackHazardSize;
+    CSStackSize += getStackHazardSize(MF);
 
   // Save number of saved regs, so we can easily update CSStackSize later.
   unsigned NumSavedRegs = SavedRegs.count();
@@ -3917,6 +3925,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
     std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
     unsigned &MaxCSFrameIndex) const {
   bool NeedsWinCFI = needsWinCFI(MF);
+  unsigned StackHazardSize = getStackHazardSize(MF);
   // To match the canonical windows frame layout, reverse the list of
   // callee saved registers to get them laid out by PrologEpilogInserter
   // in the right order. (PrologEpilogInserter allocates stack objects top
@@ -5151,6 +5160,7 @@ void AArch64FrameLowering::emitRemarks(
   if (Attrs.hasNonStreamingInterfaceAndBody())
     return;
 
+  unsigned StackHazardSize = getStackHazardSize(MF);
   const uint64_t HazardSize =
       (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
 
diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
index 82066b48c84b406..8ff59f60968bebf 100644
--- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
+++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
     const unsigned MinSize = Size.getKnownMinValue();
     assert((!Size.isScalable() || MinSize >= 128) &&
            "Scalable vector types should have size of at least 128 bits");
+    if (Size.isScalable())
+      return 3;
     if (MinSize <= 16)
       return 0;
     if (MinSize <= 32)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4c0cd1ac3d45126..31a720ed7b5c77b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "Utils/AArch64BaseInfo.h"
+#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -2535,6 +2536,11 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
     case AArch64ISD::FCMLTz:
       // Compares return either 0 or all-ones
       return VTBits;
+    case AArch64ISD::VASHR: {
+      unsigned Tmp =
+          DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+      return std::min<uint64_t>(Tmp + Op.getConstantOperandVal(1), VTBits);
+    }
   }
 
   return 1;
@@ -10082,9 +10088,9 @@ SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
 // Thus, it's only used for ptrauth references to extern_weak to avoid null
 // checks.
 
-SDValue AArch64TargetLowering::LowerPtrAuthGlobalAddressStatically(
+static SDValue LowerPtrAuthGlobalAddressStatically(
     SDValue TGA, SDLoc DL, EVT VT, AArch64PACKey::ID KeyC,
-    SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG) const {
+    SDValue Discriminator, SDValue AddrDiscriminator, SelectionDAG &DAG) {
   const auto *TGN = cast<GlobalAddressSDNode>(TGA.getNode());
   assert(TGN->getGlobal()->hasExternalWeakLinkage());
 
@@ -27574,6 +27580,22 @@ AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
   return TargetLowering::getSafeStackPointerLocation(IRB);
 }
 
+/// If a physical register, this returns the register that receives the
+/// exception address on entry to an EH pad.
+Register AArch64TargetLowering::getExceptionPointerRegister(
+    const Constant *PersonalityFn) const {
+  // FIXME: This is a guess. Has this been defined yet?
+  return AArch64::X0;
+}
+
+/// If a physical register, this returns the register that receives the
+/// exception typeid on entry to a landing pad.
+Register AArch64TargetLowering::getExceptionSelectorRegister(
+    const Constant *PersonalityFn) const {
+  // FIXME: This is a guess. Has this been defined yet?
+  return AArch64::X1;
+}
+
 bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
     const Instruction &AndI) const {
   // Only sink 'and' mask to cmp use block if it is masking a single bit, since
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 160cd18ca53b32c..d696355bb062a89 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -14,8 +14,6 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
 
-#include "AArch64.h"
-#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -830,18 +828,12 @@ class AArch64TargetLowering : public TargetLowering {
   /// If a physical register, this returns the register that receives the
   /// exception address on entry to an EH pad.
   Register
-  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
-    // FIXME: This is a guess. Has this been defined yet?
-    return AArch64::X0;
-  }
+  getExceptionPointerRegister(const Constant *PersonalityFn) const override;
 
   /// If a physical register, this returns the register that receives the
   /// exception typeid on entry to a landing pad.
   Register
-  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
-    // FIXME: This is a guess. Has this been defined yet?
-    return AArch64::X1;
-  }
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
 
   bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
 
@@ -1132,11 +1124,6 @@ class AArch64TargetLowering : public TargetLowering {
                                  SelectionDAG &DAG) const;
   SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT,
-                                              AArch64PACKey::ID Key,
-                                              SDValue Discriminator,
-                                              SDValue AddrDiscriminator,
-                                              SelectionDAG &DAG) const;
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 9dd417314fbb865..837d737b28588c4 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -5234,6 +5234,32 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
   }
 }
 
+multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
+  // double-precision to 32-bit SIMD/FPR
+  def SDr :  BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
+                                     []> {
+    let Inst{31} = 0; // 32-bit FPR flag
+  }
+
+  // half-precision to 32-bit SIMD/FPR
+  def SHr :  BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
+                                     []> {
+    let Inst{31} = 0; // 32-bit FPR flag
+  }
+
+  // half-precision to 64-bit SIMD/FPR
+  def DHr :  BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
+                                     []> {
+    let Inst{31} = 1; // 64-bit FPR flag
+  }
+
+  // single-precision to 64-bit SIMD/FPR
+  def DSr :  BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
+                                     []> {
+    let Inst{31} = 1; // 64-bit FPR flag
+  }
+}
+
 multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
                              SDPatternOperator OpN> {
   // Scaled half-precision to 32-bit
@@ -5295,7 +5321,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
 //---
 
 let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in
-class BaseIntegerToFP<bit isUnsigned,
+class BaseIntegerToFP<bits<2> rmode, bits<3> opcode,
                       RegisterClass srcType, RegisterClass dstType,
                       Operand immType, string asm, list<dag> pattern>
     : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
@@ -5305,15 +5331,16 @@ class BaseIntegerToFP<bit isUnsigned,
   bits<5> Rn;
   bits<6> scale;
   let Inst{30-24} = 0b0011110;
-  let Inst{21-17} = 0b00001;
-  let Inst{16}    = isUnsigned;
+  let Inst{21}    = 0b0;
+  let Inst{20-19} = rmode;
+  let Inst{18-16} = opcode;
   let Inst{15-10} = scale;
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Rd;
 }
 
 let mayRaiseFPException = 1, Uses = [FPCR] in
-class BaseIntegerToFPUnscaled<bit isUnsigned,
+class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
                       RegisterClass srcType, RegisterClass dstType,
                       ValueType dvt, string asm, SDPatternOperator node>
     : I<(outs dstType:$Rd), (ins srcType:$Rn),
@@ -5323,49 +5350,50 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
   bits<5> Rn;
   bits<6> scale;
   let Inst{30-24} = 0b0011110;
-  let Inst{21-17} = 0b10001;
-  let Inst{16}    = isUnsigned;
+  let Inst{21}    = 0b1;
+  let Inst{20-19} = rmode;
+  let Inst{18-16} = opcode;
   let Inst{15-10} = 0b000000;
   let Inst{9-5}   = Rn;
   let Inst{4-0}   = Rd;
 }
 
-multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
+multiclass IntegerToFP<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node> {
   // Unscaled
-  def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
+  def UWHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR16, f16, asm, node> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{23-22} = 0b11; // 16-bit FPR flag
     let Predicates = [HasFullFP16];
   }
 
-  def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
+  def UWSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR32, f32, asm, node> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
-  def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
+  def UWDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR64, f64, asm, node> {
     let Inst{31} = 0; // 32-bit GPR flag
     let Inst{23-22} = 0b01; // 64-bit FPR flag
   }
 
-  def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
+  def UXHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR16, f16, asm, node> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{23-22} = 0b11; // 16-bit FPR flag
     let Predicates = [HasFullFP16];
   }
 
-  def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
+  def UXSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR32, f32, asm, node> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
-  def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
+  def UXDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR64, f64, asm, node> {
     let Inst{31} = 1; // 64-bit GPR flag
     let Inst{23-22} = 0b01; // 64-bit FPR flag
   }
 
   // Scaled
-  def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
+  def SWHri: BaseIntegerToFP<rmode, opcode, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
                              [(set (f16 FPR16:$Rd),
                                    (fmul (node GPR32:$Rn),
                                          fixedpoint_recip_f16_i32:$scale))]> {
@@ -5375,7 +5403,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
     let Predicates = [HasFullFP16];
   }
 
-  def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
+  def SWSri: BaseIntegerToFP<rmode, opcode, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
                              [(set FPR32:$Rd,
                                    (fmul (node GPR32:$Rn),
                                          fixedpoint_recip_f32_i32:$scale))]> {
@@ -5384,7 +5412,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
     let scale{5} = 1;
   }
 
-  def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
+  def SWDri: BaseIntegerToFP<rmode, opcode, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
                              [(set FPR64:$Rd,
                                    (fmul (node GPR32:$Rn),
                                          fixedpoint_recip_f64_i32:$scale))]> {
@@ -5393,7 +5421,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
     let scale{5} = 1;
   }
 
-  def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
+  def SXHri: BaseIntegerToFP<rmode, opcode, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
                              [(set (f16 FPR16:$Rd),
                                    (fmul (node GPR64:$Rn),
                                          fixedpoint_recip_f16_i64:$scale))]> {
@@ -5402,7 +5430,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
     let Predicates = [HasFullFP16];
   }
 
-  def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
+  def SXSri: BaseIntegerToFP<rmode, opcode, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
                              [(set FPR32:$Rd,
                                    (fmul (node GPR64:$Rn),
                                          fixedpoint_recip_f32_i64:$scale))]> {
@@ -5410,7 +5438,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
     let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
-  def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
+  def SXDri: BaseIntegerToFP<rmode, opcode, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
                              [(set FPR64:$Rd,
                                    (fmul (node GPR64:$Rn),
                                          fixedpoint_recip_f64_i64:$scale))]> {
@@ -5419,6 +5447,32 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
   }
 }
 
+multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node = null_frag> {
+  // 32-bit to half-precision
+  def HSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR16, f16, asm, node> {
+    let Inst{31} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+  }
+
+  // 32-bit to double-precision
+  def DSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR64, f64, asm, node> {
+    let Inst{31} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
+  }
+
+  // 64-bit to half-precision
+  def HDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR16, f16, asm, node> {
+    let Inst{31} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+  }
+
+  // 64-bit to single-precision
+  def SDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR32, f32, asm, node> {
+    let Inst{31} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
+  }
+}
+
 //---
 // Unscaled integer <-> floating point conversion (i.e. FMOV)
 //---
@@ -13126,3 +13180,20 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
   def S : BaseAtomicFPStore<FPR32, 0b10, R, op0, asm>;
   def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
 }
+
+class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
+  : BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
+                                V128, asm, ".16b", []> {
+  let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b",
+                                    ", $Rm", ".16b", "}");
+}
+
+multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
+    def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h">{
+      let Predicates = [HasNEON, HasF8F16MM];
+    }
+    def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s">{
+      let Predicates = [HasNEON, HasF8F32MM];
+    }
+}
+
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index fe3c8578b52aa49..457e918728ae27a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -244,7 +244,7 @@ def HasSVEorSME
     : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
                 "sve or sme">;
-def HasSVEorSME2p2
+def HasNonStreamingSVEorSME2p2
     : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
                 "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2),
@@ -281,6 +281,11 @@ def HasSMEF16F16orSMEF8F16
     : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
                 AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
                 "sme-f16f16 or sme-f8f16">;
+def HasNonStreamingSVE2p2orSME2p2
+    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE2p2, FeatureSME2p2),
+                "sme2p2 or sve2p2">;
 
 // A subset of NEON instructions are legal in Streaming SVE execution mode,
 // so don't need the additional check for 'isNeonAvailable'.
@@ -4838,6 +4843,19 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
 defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
 
+let Predicates = [HasNEON, HasFPRCVT] in{
+  defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
+  defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
+  defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
+  defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
+  defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
+  defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
+  defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
+  defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
+  defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
+  defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
+}
+
 // AArch64's FCVT instructions saturate when out of range.
 multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
   let Predicates = [HasFullFP16] in {
@@ -4996,8 +5014,13 @@ def : Pat<(i64 (any_llround f64:$Rn)),
 // Scaled integer to floating point conversion instructions.
 //===----------------------------------------------------------------------===//
 
-defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
-defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
+defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
+defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
+
+let Predicates = [HasNEON, HasFPRCVT] in {
+  defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
+  defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
+}
 
 def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
           (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
@@ -10547,6 +10570,9 @@ let Predicates = [HasLSFE] in {
   def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
 }
 
+let Uses = [FPMR, FPCR] in
+defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
+
 include "AArch64InstrAtomics.td"
 include "AArch64SVEInstrInfo.td"
 include "AArch64SMEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
index 92ab4b5c3d251f3..2e5688cf60027ab 100644
--- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp
@@ -71,6 +71,18 @@ FunctionPass *llvm::createAArch64PointerAuthPass() {
 
 char AArch64PointerAuth::ID = 0;
 
+static void emitPACSymOffsetIntoX16(const TargetInstrInfo &TII,
+                                    MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I, DebugLoc DL,
+                                    MCSymbol *PACSym) {
+  BuildMI(MBB, I, DL, TII.get(AArch64::ADRP), AArch64::X16)
+      .addSym(PACSym, AArch64II::MO_PAGE);
+  BuildMI(MBB, I, DL, TII.get(AArch64::ADDXri), AArch64::X16)
+      .addReg(AArch64::X16)
+      .addSym(PACSym, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
+      .addImm(0);
+}
+
 // Where PAuthLR support is not known at compile time, it is supported using
 // PACM. PACM is in the hint space so has no effect when PAuthLR is not
 // supported by the hardware, but will alter the behaviour of PACI*SP, AUTI*SP
@@ -81,12 +93,10 @@ static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB,
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   auto &MFnI = *MBB.getParent()->getInfo<AArch64FunctionInfo>();
 
-  // ADR X16,<address_of_PACIASP>
+  // Offset to PAC*SP using ADRP + ADD.
   if (PACSym) {
     assert(Flags == MachineInstr::FrameDestroy);
-    BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADR))
-        .addReg(AArch64::X16, RegState::Define)
-        .addSym(PACSym);
+    emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
   }
 
   // Only emit PACM if -mbranch-protection has +pc and the target does not
@@ -95,12 +105,31 @@ static void BuildPACM(const AArch64Subtarget &Subtarget, MachineBasicBlock &MBB,
     BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACM)).setMIFlag(Flags);
 }
 
+static void emitPACCFI(const AArch64Subtarget &Subtarget,
+                       MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                       DebugLoc DL, MachineInstr::MIFlag Flags, bool EmitCFI) {
+  if (!EmitCFI)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  auto &MF = *MBB.getParent();
+  auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
+
+  auto CFIInst = MFnI.branchProtectionPAuthLR()
+                     ? MCCFIInstruction::createNegateRAStateWithPC(nullptr)
+                     : MCCFIInstruction::createNegateRAState(nullptr);
+
+  unsigned CFIIndex = MF.addFrameInst(CFIInst);
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+      .addCFIIndex(CFIIndex)
+      .setMIFlags(Flags);
+}
+
 void AArch64PointerAuth::signLR(MachineFunction &MF,
                                 MachineBasicBlock::iterator MBBI) const {
   auto &MFnI = *MF.getInfo<AArch64FunctionInfo>();
   bool UseBKey = MFnI.shouldSignWithBKey();
   bool EmitCFI = MFnI.needsDwarfUnwindInfo(MF);
-  bool EmitAsyncCFI = MFnI.needsAsyncDwarfUnwindInfo(MF);
   bool NeedsWinCFI = MF.hasWinCFI();
 
   MachineBasicBlock &MBB = *MBBI->getParent();
@@ -128,6 +157,7 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
                                                : AArch64::PACIASPPC))
         .setMIFlag(MachineInstr::FrameSetup)
         ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+    emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
   } else {
     BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup);
     BuildMI(MBB, MBBI, DL,
@@ -135,27 +165,10 @@ void AArch64PointerAuth::signLR(MachineFunction &MF,
                                                : AArch64::PACIASP))
         .setMIFlag(MachineInstr::FrameSetup)
         ->setPreInstrSymbol(MF, MFnI.getSigningInstrLabel());
+    emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameSetup, EmitCFI);
   }
 
-  if (EmitCFI) {
-    if (!EmitAsyncCFI) {
-      // Reduce the size of the generated call frame information for synchronous
-      // CFI by bundling the new CFI instruction with others in the prolog, so
-      // that no additional DW_CFA_advance_loc is needed.
-      for (auto I = MBBI; I != MBB.end(); ++I) {
-        if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION &&
-            I->getFlag(MachineInstr::FrameSetup)) {
-          MBBI = I;
-          break;
-        }
-      }
-    }
-    unsigned CFIIndex =
-        MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
-    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-        .addCFIIndex(CFIIndex)
-        .setMIFlags(MachineInstr::FrameSetup);
-  } else if (NeedsWinCFI) {
+  if (!EmitCFI && NeedsWinCFI) {
     BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
         .setMIFlag(MachineInstr::FrameSetup);
   }
@@ -190,6 +203,7 @@ void AArch64PointerAuth::authenticateLR(
       !MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
     if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
       assert(PACSym && "No PAC instruction to refer to");
+      emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
       BuildMI(MBB, TI, DL,
               TII->get(UseBKey ? AArch64::RETABSPPCi : AArch64::RETAASPPCi))
           .addSym(PACSym)
@@ -205,24 +219,22 @@ void AArch64PointerAuth::authenticateLR(
   } else {
     if (MFnI->branchProtectionPAuthLR() && Subtarget->hasPAuthLR()) {
       assert(PACSym && "No PAC instruction to refer to");
+      emitPACSymOffsetIntoX16(*TII, MBB, MBBI, DL, PACSym);
       BuildMI(MBB, MBBI, DL,
               TII->get(UseBKey ? AArch64::AUTIBSPPCi : AArch64::AUTIASPPCi))
           .addSym(PACSym)
           .setMIFlag(MachineInstr::FrameDestroy);
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+                 EmitAsyncCFI);
     } else {
       BuildPACM(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy, PACSym);
       BuildMI(MBB, MBBI, DL,
               TII->get(UseBKey ? AArch64::AUTIBSP : AArch64::AUTIASP))
           .setMIFlag(MachineInstr::FrameDestroy);
+      emitPACCFI(*Subtarget, MBB, MBBI, DL, MachineInstr::FrameDestroy,
+                 EmitAsyncCFI);
     }
 
-    if (EmitAsyncCFI) {
-      unsigned CFIIndex =
-          MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
-      BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlags(MachineInstr::FrameDestroy);
-    }
     if (NeedsWinCFI) {
       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PACSignLR))
           .setMIFlag(MachineInstr::FrameDestroy);
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 6044b5bb7d81511..b7165294288946d 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -1000,10 +1000,23 @@ defm FMOPA_MPPZZ_BtoS : sme_outer_product_fp32<0b0, 0b01, ZPR8, "fmopa", null_fr
 
 } //[HasSMEF8F32]
 
+let Predicates = [HasSME2, HasSVEBFSCALE] in {
+  defm BFSCALE : sme2_bfscale_single<"bfscale">;
+  defm BFSCALE : sme2_bfscale_multi<"bfscale">;
+}
+
 let Predicates = [HasSME2p2] in {
   def FTMOPA_M2ZZZI_HtoS  : sme_tmopa_32b<0b11000, ZZ_h_mul_r, ZPR16, "ftmopa">;
   def FTMOPA_M2ZZZI_StoS  : sme_tmopa_32b<0b00000, ZZ_s_mul_r, ZPR32, "ftmopa">;
   def BFTMOPA_M2ZZZI_HtoS : sme_tmopa_32b<0b10000, ZZ_h_mul_r, ZPR16, "bftmopa">;
+
+  defm BFMOP4A : sme2_bfmop4as_widening<0, "bfmop4a">;
+  defm BFMOP4S : sme2_bfmop4as_widening<1, "bfmop4s">;
+
+  defm FMUL_2ZZ  : sme2_multi2_fmul_sm<"fmul">;
+  defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">;
+  defm FMUL_4ZZ  : sme2_multi4_fmul_sm<"fmul">;
+  defm FMUL_4Z4Z : sme2_multi4_fmul_mm< "fmul">;
 } // [HasSME2p2]
 
 let Predicates = [HasSME2p2, HasSMEB16B16] in {
@@ -1020,4 +1033,17 @@ let Predicates = [HasSME2p2, HasSMEF8F16], Uses = [FPMR, FPCR] in {
 
 let Predicates = [HasSME2p2, HasSMEF16F16] in {
   def FTMOPA_M2ZZZI_HtoH : sme_tmopa_16b<0b10001, ZZ_h_mul_r, ZPR16, "ftmopa">;
+  defm FMOP4A : sme2_fmop4as_fp16_non_widening<0, "fmop4a">;
+  defm FMOP4S : sme2_fmop4as_fp16_non_widening<1, "fmop4s">;
 } // [HasSME2p2, HasSMEF16F16]
+
+let Predicates = [HasSME2, HasSVEBFSCALE] in {
+  defm BFMUL : sme2_bfmul_single<"bfmul">;
+  defm BFMUL : sme2_bfmul_multi<"bfmul">;
+} //[HasSME2, HasSVEBFSCALE]
+
+let Uses = [FPMR, FPCR] in {
+let Predicates = [HasSME2p2, HasSMEF8F32] in {
+  defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">;
+}
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 59859cb7442d59d..2564ddc5f2e5ca8 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -647,22 +647,22 @@ let Predicates = [HasSVEorSME] in {
   defm SDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b0, "sdot", int_aarch64_sve_sdot_lane>;
   defm UDOT_ZZZI : sve_intx_dot_by_indexed_elem<0b1, "udot", int_aarch64_sve_udot_lane>;
 
-  defm SXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b000, "sxtb", AArch64sxt_mt>;
-  defm UXTB_ZPmZ : sve_int_un_pred_arit_0_h<0b001, "uxtb", AArch64uxt_mt>;
-  defm SXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b010, "sxth", AArch64sxt_mt>;
-  defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>;
-  defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>;
-  defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>;
-  defm ABS_ZPmZ  : sve_int_un_pred_arit_0<  0b110, "abs",  AArch64abs_mt>;
-  defm NEG_ZPmZ  : sve_int_un_pred_arit_0<  0b111, "neg",  AArch64neg_mt>;
-
-  defm CLS_ZPmZ  : sve_int_un_pred_arit_1<   0b000, "cls",  AArch64cls_mt>;
-  defm CLZ_ZPmZ  : sve_int_un_pred_arit_1<   0b001, "clz",  AArch64clz_mt>;
-  defm CNT_ZPmZ  : sve_int_un_pred_arit_1<   0b010, "cnt",  AArch64cnt_mt>;
-  defm CNOT_ZPmZ : sve_int_un_pred_arit_1<   0b011, "cnot", AArch64cnot_mt>;
-  defm NOT_ZPmZ  : sve_int_un_pred_arit_1<   0b110, "not",  AArch64not_mt>;
-  defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", AArch64fabs_mt>;
-  defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", AArch64fneg_mt>;
+  defm SXTB_ZPmZ : sve_int_un_pred_arit_h<0b000, "sxtb", AArch64sxt_mt>;
+  defm UXTB_ZPmZ : sve_int_un_pred_arit_h<0b001, "uxtb", AArch64uxt_mt>;
+  defm SXTH_ZPmZ : sve_int_un_pred_arit_w<0b010, "sxth", AArch64sxt_mt>;
+  defm UXTH_ZPmZ : sve_int_un_pred_arit_w<0b011, "uxth", AArch64uxt_mt>;
+  defm SXTW_ZPmZ : sve_int_un_pred_arit_d<0b100, "sxtw", AArch64sxt_mt>;
+  defm UXTW_ZPmZ : sve_int_un_pred_arit_d<0b101, "uxtw", AArch64uxt_mt>;
+  defm ABS_ZPmZ  : sve_int_un_pred_arit<  0b110, "abs",  AArch64abs_mt>;
+  defm NEG_ZPmZ  : sve_int_un_pred_arit<  0b111, "neg",  AArch64neg_mt>;
+
+  defm CLS_ZPmZ  : sve_int_un_pred_arit_bitwise<   0b000, "cls",  AArch64cls_mt>;
+  defm CLZ_ZPmZ  : sve_int_un_pred_arit_bitwise<   0b001, "clz",  AArch64clz_mt>;
+  defm CNT_ZPmZ  : sve_int_un_pred_arit_bitwise<   0b010, "cnt",  AArch64cnt_mt>;
+  defm CNOT_ZPmZ : sve_int_un_pred_arit_bitwise<   0b011, "cnot", AArch64cnot_mt>;
+  defm NOT_ZPmZ  : sve_int_un_pred_arit_bitwise<   0b110, "not",  AArch64not_mt>;
+  defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
+  defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
 
   foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
     // No dedicated instruction, so just clear the sign bit.
@@ -928,9 +928,10 @@ let Predicates = [HasSVEorSME] in {
   defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>;
 } // End HasSVEorSME
 
-let Predicates = [HasSVE] in {
-  defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
-} // End HasSVE
+// COMPACT - word and doubleword
+let Predicates = [HasNonStreamingSVEorSME2p2] in {
+  defm COMPACT_ZPZ : sve_int_perm_compact_sd<"compact", int_aarch64_sve_compact>;
+}
 
 let Predicates = [HasSVEorSME] in {
   defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
@@ -2128,7 +2129,7 @@ let Predicates = [HasSVEorSME] in {
   defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>;
   defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
   defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
-  defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
+  defm CNTP_XPP : sve_int_pcount_pred<0b000, "cntp", int_aarch64_sve_cntp>;
 
   def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)),
             (CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1),
@@ -2427,7 +2428,7 @@ let Predicates = [HasBF16, HasSVEorSME] in {
 } // End HasBF16, HasSVEorSME
 
 let Predicates = [HasBF16, HasSVE] in {
-  defm BFMMLA_ZZZ   : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
+  defm BFMMLA_ZZZ   : sve_fp_matrix_mla<0b01, "bfmmla", ZPR32, ZPR16, int_aarch64_sve_bfmmla, nxv4f32, nxv8bf16>;
 } // End HasBF16, HasSVE
 
 let Predicates = [HasBF16, HasSVEorSME] in {
@@ -3449,11 +3450,15 @@ let Predicates = [HasSVEorSME, HasMatMulInt8] in {
 } // End HasSVEorSME, HasMatMulInt8
 
 let Predicates = [HasSVE, HasMatMulFP32] in {
-  defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
+  defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0b10, "fmmla", ZPR32, ZPR32, int_aarch64_sve_fmmla, nxv4f32, nxv4f32>;
 } // End HasSVE, HasMatMulFP32
 
+let Predicates = [HasSVE_F16F32MM] in {
+  def FMLLA_ZZZ_HtoS : sve_fp_matrix_mla<0b00, "fmmla", ZPR32, ZPR16>;
+} // End HasSVE_F16F32MM
+
 let Predicates = [HasSVE, HasMatMulFP64] in {
-  defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
+  defm FMMLA_ZZZ_D : sve_fp_matrix_mla<0b11, "fmmla", ZPR64, ZPR64, int_aarch64_sve_fmmla, nxv2f64, nxv2f64>;
   defm LD1RO_B_IMM : sve_mem_ldor_si<0b00, "ld1rob", Z_b, ZPR8,  nxv16i8, nxv16i1, AArch64ld1ro_z>;
   defm LD1RO_H_IMM : sve_mem_ldor_si<0b01, "ld1roh", Z_h, ZPR16, nxv8i16, nxv8i1,  AArch64ld1ro_z>;
   defm LD1RO_W_IMM : sve_mem_ldor_si<0b10, "ld1row", Z_s, ZPR32, nxv4i32, nxv4i1,  AArch64ld1ro_z>;
@@ -3586,10 +3591,10 @@ let Predicates = [HasSVE2orSME] in {
   defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>;
 
   // SVE2 integer unary operations (predicated)
-  defm URECPE_ZPmZ  : sve2_int_un_pred_arit_s<0b000, "urecpe",  int_aarch64_sve_urecpe>;
-  defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b001, "ursqrte", int_aarch64_sve_ursqrte>;
-  defm SQABS_ZPmZ   : sve2_int_un_pred_arit<0b100,   "sqabs",   int_aarch64_sve_sqabs>;
-  defm SQNEG_ZPmZ   : sve2_int_un_pred_arit<0b101,   "sqneg",   int_aarch64_sve_sqneg>;
+  defm URECPE_ZPmZ  : sve2_int_un_pred_arit_s<0b00, "urecpe",  int_aarch64_sve_urecpe>;
+  defm URSQRTE_ZPmZ : sve2_int_un_pred_arit_s<0b01, "ursqrte", int_aarch64_sve_ursqrte>;
+  defm SQABS_ZPmZ   : sve2_int_un_pred_arit<  0b10, "sqabs",   int_aarch64_sve_sqabs>;
+  defm SQNEG_ZPmZ   : sve2_int_un_pred_arit<  0b11, "sqneg",   int_aarch64_sve_sqneg>;
 
   // SVE2 saturating add/subtract
   defm SQADD_ZPmZ  : sve2_int_arith_pred<0b110000, "sqadd",  int_aarch64_sve_sqadd>;
@@ -3928,6 +3933,10 @@ let Predicates = [HasSVEAES2, HasSVE2p1orSSVE_AES] in {
   def AESD_4ZZI_B    : sve_crypto_binary_multi4<0b0100, "aesd">;
   def AESEMC_4ZZI_B  : sve_crypto_binary_multi4<0b1000, "aesemc">;
   def AESDMIC_4ZZI_B : sve_crypto_binary_multi4<0b1100, "aesdimc">;
+
+  // SVE_AES2 multi-vector polynomial multiply
+  def PMLAL_2ZZZ_Q : sve_crypto_pmlal_multi<"pmlal">;
+  def PMULL_2ZZZ_Q : sve_crypto_pmull_multi<"pmull">;
 } // End HasSVEAES2, HasSVE2p1orSSVE_AES
 
 //===----------------------------------------------------------------------===//
@@ -4217,15 +4226,37 @@ defm TBLQ_ZZZ  : sve2p1_tblq<"tblq", int_aarch64_sve_tblq>;
 // SME2.2 or SVE2.2 instructions
 //===----------------------------------------------------------------------===//
 let Predicates = [HasSVE2p2orSME2p2] in {
+  // SVE Floating-point convert precision, zeroing predicate
+  defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt">;
+
   // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
   defm FCVTNT_ZPzZ      : sve_fp_fcvtntz<"fcvtnt">;
   def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
+  // Placing even
+  def FCVTX_ZPzZ_DtoS   : sve_fp_z2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32>;
 
   // SVE2p2 floating-point convert precision up, zeroing predicate
   defm FCVTLT_ZPzZ      : sve_fp_fcvtltz<"fcvtlt">;
 
   // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
   def BFCVTNT_ZPzZ      : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
+  // Placing corresponding
+  def BFCVT_ZPzZ_StoH   : sve_fp_z2op_p_zd<0b1001010, "bfcvt", ZPR32, ZPR16>;
+
+  // Floating-point convert to integer, zeroing predicate
+  defm FCVTZS_ZPzZ : sve_fp_z2op_p_zd_d<0b0, "fcvtzs">;
+  defm FCVTZU_ZPzZ : sve_fp_z2op_p_zd_d<0b1, "fcvtzu">;
+  // Integer convert to floating-point, zeroing predicate
+  defm SCVTF_ZPzZ  : sve_fp_z2op_p_zd_c<0b0, "scvtf">;
+  defm UCVTF_ZPzZ  : sve_fp_z2op_p_zd_c<0b1, "ucvtf">;
+  // Signed integer base 2 logarithm of fp value, zeroing predicate
+  defm FLOGB_ZPzZ : sve_fp_z2op_p_zd_d_flogb<"flogb">;
+
+  // SVE2 integer unary operations, zeroing predicate
+  def URECPE_ZPzZ  : sve2_int_un_pred_arit_z<0b10, 0b00, "urecpe", ZPR32>;
+  def URSQRTE_ZPzZ : sve2_int_un_pred_arit_z<0b10, 0b01, "ursqrte", ZPR32>;
+  defm SQABS_ZPzZ  : sve2_int_un_pred_arit_z<0b10, "sqabs">;
+  defm SQNEG_ZPzZ  : sve2_int_un_pred_arit_z<0b11, "sqneg">;
 
   // Floating point round to integral fp value in integer size range
   // Merging
@@ -4252,8 +4283,42 @@ let Predicates = [HasSVE2p2orSME2p2] in {
   // Floating-point square root, zeroing predicate
   defm FSQRT_ZPZz  : sve_fp_z2op_p_zd_hsd<0b01101, "fsqrt">;
 
+  // SVE2p2 integer unary arithmetic (bitwise), zeroing predicate
+  defm CLS_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b000, "cls">;
+  defm CLZ_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b001, "clz">;
+  defm CNT_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b010, "cnt">;
+  defm CNOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b011, "cnot">;
+  defm NOT_ZPzZ  : sve_int_un_pred_arit_bitwise_z<0b110, "not">;
+
+  // floating point
+  defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs">;
+  defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg">;
+
+  // SVE2p2 integer unary arithmetic, zeroing predicate
+  defm SXTB_ZPzZ  : sve_int_un_pred_arit_h_z<0b000, "sxtb">;
+  defm UXTB_ZPzZ  : sve_int_un_pred_arit_h_z<0b001, "uxtb">;
+  defm SXTH_ZPzZ  : sve_int_un_pred_arit_w_z<0b010, "sxth">;
+  defm UXTH_ZPzZ  : sve_int_un_pred_arit_w_z<0b011, "uxth">;
+  defm ABS_ZPzZ   : sve_int_un_pred_arit_z<  0b110, "abs">;
+  defm NEG_ZPzZ   : sve_int_un_pred_arit_z<  0b111, "neg">;
+  def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>;
+  def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>;
+
+  // SVE predicate count
+  defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">;
+  defm LASTP_XPP  : sve_int_pcount_pred_tmp<0b010, "lastp">;
 } // End HasSME2p2orSVE2p2
 
+//===----------------------------------------------------------------------===//
+// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2
+//===----------------------------------------------------------------------===//
+let Predicates = [HasNonStreamingSVE2p2orSME2p2] in {
+  // SVE2 EXPAND
+  defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">;
+  // SVE COMPACT - byte and halfword
+  defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact">;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE2 FP8 instructions
 //===----------------------------------------------------------------------===//
@@ -4302,6 +4367,14 @@ def FMLALLTB_ZZZ : sve2_fp8_mla<0b010, ZPR32, "fmlalltb">;
 def FMLALLTT_ZZZ : sve2_fp8_mla<0b011, ZPR32, "fmlalltt">;
 } // End HasSSVE_FP8FMA
 
+let Predicates = [HasSVE2, HasF8F32MM] in {
+  def FMMLA_ZZZ_BtoS :  sve2_fp8_mmla<0b0, ZPR32, "fmmla">;
+}
+
+let Predicates = [HasSVE2, HasF8F16MM] in {
+  def FMMLA_ZZZ_BtoH :  sve2_fp8_mmla<0b1, ZPR16, "fmmla">;
+}
+
 let Predicates = [HasSSVE_FP8DOT2] in {
 // FP8 Widening Dot-Product - Indexed Group
 defm FDOT_ZZZI_BtoH : sve2_fp8_dot_indexed_h<"fdot">;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 19ef6f4fb32e747..525538db8036c2c 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -11,6 +11,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64TargetMachine.h"
+#include "Utils/AArch64SMEAttributes.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-selectiondag-info"
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32db1e8c2477a85..7fb2a961e0313d3 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -76,6 +76,16 @@ static cl::opt<unsigned> AArch64MinimumJumpTableEntries(
     "aarch64-min-jump-table-entries", cl::init(13), cl::Hidden,
     cl::desc("Set minimum number of entries to use a jump table on AArch64"));
 
+static cl::opt<unsigned> AArch64StreamingHazardSize(
+    "aarch64-streaming-hazard-size",
+    cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
+    cl::init(0), cl::Hidden);
+
+static cl::alias AArch64StreamingStackHazardSize(
+    "aarch64-stack-hazard-size",
+    cl::desc("alias for -aarch64-streaming-hazard-size"),
+    cl::aliasopt(AArch64StreamingHazardSize));
+
 unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
   if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
     return OverrideVectorInsertExtractBaseCost;
@@ -333,6 +343,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
       IsLittle(LittleEndian), IsStreaming(IsStreaming),
       IsStreamingCompatible(IsStreamingCompatible),
+      StreamingHazardSize(AArch64StreamingHazardSize),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 9856415361e50d7..50adb7cbf69a872 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool IsStreaming;
   bool IsStreamingCompatible;
+  unsigned StreamingHazardSize;
   unsigned MinSVEVectorSizeInBits;
   unsigned MaxSVEVectorSizeInBits;
   unsigned VScaleForTuning = 2;
@@ -172,6 +173,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// Returns true if the function has a streaming-compatible body.
   bool isStreamingCompatible() const { return IsStreamingCompatible; }
 
+  /// Returns the size of memory region that if accessed by both the CPU and
+  /// the SME unit could result in a hazard. 0 = disabled.
+  unsigned getStreamingHazardSize() const { return StreamingHazardSize; }
+
   /// Returns true if the target has NEON and the function at runtime is known
   /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
   /// mode, which disables NEON instructions).
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ff3c69f7e10c660..71f9bbbbc350415 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -10,6 +10,7 @@
 #include "AArch64ExpandImm.h"
 #include "AArch64PerfectShuffle.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index dfc5e04110cf57b..5a487be5723ce92 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -195,6 +195,7 @@ class AArch64AsmParser : public MCTargetAsmParser {
   bool parseDirectiveReq(StringRef Name, SMLoc L);
   bool parseDirectiveUnreq(SMLoc L);
   bool parseDirectiveCFINegateRAState();
+  bool parseDirectiveCFINegateRAStateWithPC();
   bool parseDirectiveCFIBKeyFrame();
   bool parseDirectiveCFIMTETaggedFrame();
 
@@ -6261,6 +6262,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
   case Match_InvalidMatrixTileVectorV128:
     return Error(Loc,
                  "invalid matrix operand, expected za[0-15]h.q or za[0-15]v.q");
+  case Match_InvalidMatrixTile16:
+    return Error(Loc, "invalid matrix operand, expected za[0-1].h");
   case Match_InvalidMatrixTile32:
     return Error(Loc, "invalid matrix operand, expected za[0-3].s");
   case Match_InvalidMatrixTile64:
@@ -6881,6 +6884,7 @@ bool AArch64AsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidSVEExactFPImmOperandHalfOne:
   case Match_InvalidSVEExactFPImmOperandHalfTwo:
   case Match_InvalidSVEExactFPImmOperandZeroOne:
+  case Match_InvalidMatrixTile16:
   case Match_InvalidMatrixTile32:
   case Match_InvalidMatrixTile64:
   case Match_InvalidMatrix:
@@ -6975,6 +6979,8 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
     parseDirectiveInst(Loc);
   else if (IDVal == ".cfi_negate_ra_state")
     parseDirectiveCFINegateRAState();
+  else if (IDVal == ".cfi_negate_ra_state_with_pc")
+    parseDirectiveCFINegateRAStateWithPC();
   else if (IDVal == ".cfi_b_key_frame")
     parseDirectiveCFIBKeyFrame();
   else if (IDVal == ".cfi_mte_tagged_frame")
@@ -7425,6 +7431,13 @@ bool AArch64AsmParser::parseDirectiveCFINegateRAState() {
   return false;
 }
 
+bool AArch64AsmParser::parseDirectiveCFINegateRAStateWithPC() {
+  if (parseEOL())
+    return true;
+  getStreamer().emitCFINegateRAStateWithPC();
+  return false;
+}
+
 /// parseDirectiveCFIBKeyFrame
 /// ::= .cfi_b_key
 bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5aee7804de3e3fd..065858c42894471 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -18,6 +18,7 @@
 #include "AArch64MachineFunctionInfo.h"
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
+#include "Utils/AArch64SMEAttributes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ObjCARCUtil.h"
@@ -393,8 +394,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
       // i1 is a special case because SDAG i1 true is naturally zero extended
       // when widened using ANYEXT. We need to do it explicitly here.
       auto &Flags = CurArgInfo.Flags[0];
-      if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
-          !Flags.isZExt()) {
+      if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
+          !Flags.isSExt() && !Flags.isZExt()) {
         CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
       } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
                  1) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index df0c09d32c074ac..afea08ab0925011 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
   unsigned RegBankID = RB.getID();
 
   if (RegBankID == AArch64::GPRRegBankID) {
+    assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
     if (SizeInBits <= 32)
       return GetAllRegSet ? &AArch64::GPR32allRegClass
                           : &AArch64::GPR32RegClass;
@@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
   }
 
   if (RegBankID == AArch64::FPRRegBankID) {
+    if (SizeInBits.isScalable()) {
+      assert(SizeInBits == TypeSize::getScalable(128) &&
+             "Unexpected scalable register size");
+      return &AArch64::ZPRRegClass;
+    }
+
     switch (SizeInBits) {
     default:
       return nullptr;
@@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
   // then we can pull it into the helpers that get the appropriate class for a
   // register bank. Or make a new helper that carries along some constraint
   // information.
-  if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
+  if (SrcRegBank != DstRegBank &&
+      (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
     SrcSize = DstSize = TypeSize::getFixed(32);
 
   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 5cd1fea75025cd1..6024027afaf6ce9 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
 
   const bool HasCSSC = ST.hasCSSC();
   const bool HasRCPC3 = ST.hasRCPC3();
+  const bool HasSVE = ST.hasSVE();
 
   getActionDefinitionsBuilder(
       {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
@@ -127,7 +128,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0);
 
-  getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
+  getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
+      .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
+      .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, s32, s64)
+      .clampMaxNumElements(0, s8, 16)
+      .clampMaxNumElements(0, s16, 8)
+      .clampNumElements(0, v2s32, v4s32)
+      .clampNumElements(0, v2s64, v2s64)
+      .minScalarOrEltIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].getNumElements() <= 2;
+          },
+          0, s32)
+      .minScalarOrEltIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].getNumElements() <= 4;
+          },
+          0, s16)
+      .minScalarOrEltIf(
+          [=](const LegalityQuery &Query) {
+            return Query.Types[0].getNumElements() <= 16;
+          },
+          0, s8)
+      .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+      .moreElementsToNextPow2(0);
+
+  getActionDefinitionsBuilder(G_MUL)
       .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
       .widenScalarToNextPow2(0)
       .clampScalar(0, s32, s64)
@@ -1508,6 +1536,14 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
 
 bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
                                              MachineInstr &MI) const {
+  auto LowerBinOp = [&MI](unsigned Opcode) {
+    MachineIRBuilder MIB(MI);
+    MIB.buildInstr(Opcode, {MI.getOperand(0)},
+                   {MI.getOperand(2), MI.getOperand(3)});
+    MI.eraseFromParent();
+    return true;
+  };
+
   Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
   switch (IntrinsicID) {
   case Intrinsic::vacopy: {
@@ -1647,37 +1683,25 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     return true;
   }
   case Intrinsic::aarch64_neon_smax:
+    return LowerBinOp(TargetOpcode::G_SMAX);
   case Intrinsic::aarch64_neon_smin:
+    return LowerBinOp(TargetOpcode::G_SMIN);
   case Intrinsic::aarch64_neon_umax:
+    return LowerBinOp(TargetOpcode::G_UMAX);
   case Intrinsic::aarch64_neon_umin:
+    return LowerBinOp(TargetOpcode::G_UMIN);
   case Intrinsic::aarch64_neon_fmax:
+    return LowerBinOp(TargetOpcode::G_FMAXIMUM);
   case Intrinsic::aarch64_neon_fmin:
+    return LowerBinOp(TargetOpcode::G_FMINIMUM);
   case Intrinsic::aarch64_neon_fmaxnm:
-  case Intrinsic::aarch64_neon_fminnm: {
-    MachineIRBuilder MIB(MI);
-    if (IntrinsicID == Intrinsic::aarch64_neon_smax)
-      MIB.buildSMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
-    else if (IntrinsicID == Intrinsic::aarch64_neon_smin)
-      MIB.buildSMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
-    else if (IntrinsicID == Intrinsic::aarch64_neon_umax)
-      MIB.buildUMax(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
-    else if (IntrinsicID == Intrinsic::aarch64_neon_umin)
-      MIB.buildUMin(MI.getOperand(0), MI.getOperand(2), MI.getOperand(3));
-    else if (IntrinsicID == Intrinsic::aarch64_neon_fmax)
-      MIB.buildInstr(TargetOpcode::G_FMAXIMUM, {MI.getOperand(0)},
-                     {MI.getOperand(2), MI.getOperand(3)});
-    else if (IntrinsicID == Intrinsic::aarch64_neon_fmin)
-      MIB.buildInstr(TargetOpcode::G_FMINIMUM, {MI.getOperand(0)},
-                     {MI.getOperand(2), MI.getOperand(3)});
-    else if (IntrinsicID == Intrinsic::aarch64_neon_fmaxnm)
-      MIB.buildInstr(TargetOpcode::G_FMAXNUM, {MI.getOperand(0)},
-                     {MI.getOperand(2), MI.getOperand(3)});
-    else if (IntrinsicID == Intrinsic::aarch64_neon_fminnm)
-      MIB.buildInstr(TargetOpcode::G_FMINNUM, {MI.getOperand(0)},
-                     {MI.getOperand(2), MI.getOperand(3)});
-    MI.eraseFromParent();
-    return true;
-  }
+    return LowerBinOp(TargetOpcode::G_FMAXNUM);
+  case Intrinsic::aarch64_neon_fminnm:
+    return LowerBinOp(TargetOpcode::G_FMINNUM);
+  case Intrinsic::aarch64_neon_smull:
+    return LowerBinOp(AArch64::G_UMULL);
+  case Intrinsic::aarch64_neon_umull:
+    return LowerBinOp(AArch64::G_SMULL);
   case Intrinsic::vector_reverse:
     // TODO: Add support for vector_reverse
     return false;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 762a7af8c3ddb3d..2ee2ee5a6fa500f 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -59,12 +59,12 @@ bool AArch64InstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
   return false;
 }
 
-void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   markup(OS, Markup::Register) << getRegisterName(Reg);
 }
 
 void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg,
-                                      unsigned AltIdx) const {
+                                      unsigned AltIdx) {
   markup(OS, Markup::Register) << getRegisterName(Reg, AltIdx);
 }
 
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index e7b62b3203681bc..9cf2674ae943aa7 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -29,8 +29,8 @@ class AArch64InstPrinter : public MCInstPrinter {
 
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
-  void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
+  void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx);
 
   // Autogenerated by tblgen.
   std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 08929ed5616b2c8..e7c90b0ed14e063 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5188,3 +5188,232 @@ class sme2_luti4_vector_vg4_strided<bits<2> sz, bits<2> op, string mnemonic>
   let Inst{3-2}   = 0b00;
   let Inst{1-0}   = Zd{1-0};
 }
+
+multiclass sme2_bfscale_single<string mnemonic> {
+  def _2ZZ : sme2_sve_destructive_vector_vg2_single<0b00, 0b0011000, ZZ_h_mul_r, ZPR4b16, mnemonic>;
+  def _4ZZ : sme2_sve_destructive_vector_vg4_single<0b00, 0b0011000, ZZZZ_h_mul_r, ZPR4b16, mnemonic>;
+}
+
+multiclass sme2_bfscale_multi<string mnemonic> {
+  def _2Z2Z : sme2_sve_destructive_vector_vg2_multi<0b00, 0b0011000, ZZ_h_mul_r, mnemonic>;
+  def _4Z4Z : sme2_sve_destructive_vector_vg4_multi<0b00, 0b0011000, ZZZZ_h_mul_r, mnemonic>;
+}
+
+class sme2_bf16_fp32_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
+    : I<(outs TileOp32:$ZAda),
+        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
+        mnemonic, "\t$ZAda, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<2> ZAda;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-21} = 0b10000001000;
+  let Inst{20} = M;
+  let Inst{19-17} = Zm;
+  let Inst{16-10} = 0b0000000;
+  let Inst{9} = N;
+  let Inst{8-6} = Zn;
+  let Inst{5} = 0;
+  let Inst{4} = S;
+  let Inst{3-2} = 0b00;
+  let Inst{1-0} = ZAda;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_bfmop4as_widening<bit S, string mnemonic> {
+  // Single vectors
+  def _MZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
+
+  // Multiple and single vectors
+  def _M2ZZ_S : sme2_bf16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
+
+  // Single and multiple vectors
+  def _MZ2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
+
+  // Multiple vectors
+  def _M2Z2Z_S : sme2_bf16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
+}
+
+class sme2_multi2_fmul_sm<bits<2> size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty>
+    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm),
+        mnemonic, "\t$Zd, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<4> Zd;
+  bits<4> Zn;
+  bits<4> Zm;
+
+  let Inst{31-24} = 0b11000001;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0b1;
+  let Inst{20-17} = Zm;
+  let Inst{16-10} = 0b0111010;
+  let Inst{9-6}   = Zn;
+  let Inst{5}     = 0b0;
+  let Inst{4-1}   = Zd;
+  let Inst{0}     = 0b0;
+}
+
+multiclass sme2_multi2_fmul_sm<string mnemonic> {
+  def _H : sme2_multi2_fmul_sm<0b01, mnemonic, ZZ_h_mul_r, ZPR4b16>;
+  def _S : sme2_multi2_fmul_sm<0b10, mnemonic, ZZ_s_mul_r, ZPR4b32>;
+  def _D : sme2_multi2_fmul_sm<0b11, mnemonic, ZZ_d_mul_r, ZPR4b64>;
+}
+
+class sme2_multi4_fmul_sm<bits<2> size, string mnemonic, RegisterOperand vector_ty, RegisterOperand zpr_ty>
+    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, zpr_ty:$Zm),
+        mnemonic, "\t$Zd, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<3> Zd;
+  bits<3> Zn;
+  bits<4> Zm;
+
+  let Inst{31-24} = 0b11000001;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0b1;
+  let Inst{20-17} = Zm;
+  let Inst{16-10} = 0b1111010;
+  let Inst{9-7}   = Zn;
+  let Inst{6-5}   = 0b00;
+  let Inst{4-2}   = Zd;
+  let Inst{1-0}   = 0b00;
+}
+
+multiclass sme2_multi4_fmul_sm<string mnemonic> {
+  def _H : sme2_multi4_fmul_sm<0b01, mnemonic, ZZZZ_h_mul_r, ZPR4b16>;
+  def _S : sme2_multi4_fmul_sm<0b10, mnemonic, ZZZZ_s_mul_r, ZPR4b32>;
+  def _D : sme2_multi4_fmul_sm<0b11, mnemonic, ZZZZ_d_mul_r, ZPR4b64>;
+}
+
+multiclass sme2_bfmul_single<string mnemonic> {
+  def _2ZZ  : sme2_multi2_fmul_sm<0b00, mnemonic, ZZ_h_mul_r,   ZPR4b16>;
+  def _4ZZ  : sme2_multi4_fmul_sm<0b00, mnemonic, ZZZZ_h_mul_r, ZPR4b16>;
+}
+
+class sme2_multi2_fmul_mm<bits<2> size, string mnemonic, RegisterOperand vector_ty>
+    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
+        mnemonic, "\t$Zd, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<4> Zd;
+  bits<4> Zn;
+  bits<4> Zm;
+
+  let Inst{31-24} = 0b11000001;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0b1;
+  let Inst{20-17} = Zm;
+  let Inst{16-10} = 0b0111001;
+  let Inst{9-6}   = Zn;
+  let Inst{5}     = 0b0;
+  let Inst{4-1}   = Zd;
+  let Inst{0}     = 0b0;
+}
+
+multiclass sme2_multi2_fmul_mm<string mnemonic> {
+  def _H : sme2_multi2_fmul_mm<0b01, mnemonic, ZZ_h_mul_r>;
+  def _S : sme2_multi2_fmul_mm<0b10, mnemonic, ZZ_s_mul_r>;
+  def _D : sme2_multi2_fmul_mm<0b11, mnemonic, ZZ_d_mul_r>;
+}
+
+class sme2_multi4_fmul_mm<bits<2> size, string mnemonic, RegisterOperand vector_ty>
+    : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm),
+        mnemonic, "\t$Zd, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<3> Zd;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-24} = 0b11000001;
+  let Inst{23-22} = size;
+  let Inst{21}    = 0b1;
+  let Inst{20-18} = Zm;
+  let Inst{17-10} = 0b01111001;
+  let Inst{9-7}   = Zn;
+  let Inst{6-5}   = 0b00;
+  let Inst{4-2}   = Zd;
+  let Inst{1-0}   = 0b00;
+}
+
+multiclass sme2_multi4_fmul_mm<string mnemonic> {
+  def _H : sme2_multi4_fmul_mm<0b01, mnemonic, ZZZZ_h_mul_r>;
+  def _S : sme2_multi4_fmul_mm<0b10, mnemonic, ZZZZ_s_mul_r>;
+  def _D : sme2_multi4_fmul_mm<0b11, mnemonic, ZZZZ_d_mul_r>;
+}
+
+multiclass sme2_bfmul_multi<string mnemonic> {
+  def _2Z2Z : sme2_multi2_fmul_mm<0b00, mnemonic, ZZ_h_mul_r>;
+  def _4Z4Z : sme2_multi4_fmul_mm<0b00, mnemonic, ZZZZ_h_mul_r>;
+}
+
+class sme2_fp16_quarter_tile_outer_product<bit M, bit N, bit S, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
+    : I<(outs TileOp16:$ZAda),
+        (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
+        mnemonic, "\t$ZAda, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bit ZAda;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-21} = 0b10000001000;
+  let Inst{20} = M;
+  let Inst{19-17} = Zm;
+  let Inst{16-10} = 0b0000000;
+  let Inst{9} = N;
+  let Inst{8-6} = Zn;
+  let Inst{5} = 0;
+  let Inst{4} = S;
+  let Inst{3-1} = 0b100;
+  let Inst{0} = ZAda;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_fmop4as_fp16_non_widening<bit S, string mnemonic> {
+  // Single vectors
+  def _MZZ_H : sme2_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>;
+
+  // Multiple and single vectors
+  def _M2ZZ_H : sme2_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>;
+
+  // Single and multiple vectors
+  def _MZ2Z_H : sme2_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>;
+
+  // Multiple vectors
+  def _M2Z2Z_H : sme2_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>;
+}
+
+class sme2_fp8_fp32_quarter_tile_outer_product<bit M, bit N, string mnemonic, RegisterOperand zn_ty, RegisterOperand zm_ty>
+    : I<(outs TileOp32:$ZAda),
+        (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm),
+        mnemonic, "\t$ZAda, $Zn, $Zm",
+        "", []>, Sched<[]> {
+  bits<2> ZAda;
+  bits<3> Zn;
+  bits<3> Zm;
+
+  let Inst{31-21} = 0b10000000001;
+  let Inst{20} = M;
+  let Inst{19-17} = Zm;
+  let Inst{16-10} = 0b0000000;
+  let Inst{9} = N;
+  let Inst{8-6} = Zn;
+  let Inst{5-2} = 0b0000;
+  let Inst{1-0} = ZAda;
+
+  let Constraints = "$ZAda = $_ZAda";
+}
+
+multiclass sme2_fmop4a_fp8_fp32_4way<string mnemonic> {
+  // Single vectors
+  def _MZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 0, mnemonic, ZPR8Mul2_Lo, ZPR8Mul2_Hi>;
+
+  // Multiple and single vectors
+  def _M2ZZ_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<0, 1, mnemonic, ZZ_b_mul_r_Lo, ZPR8Mul2_Hi>;
+
+  // Single and multiple vectors
+  def _MZ2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 0, mnemonic, ZPR8Mul2_Lo, ZZ_b_mul_r_Hi>;
+
+  // Multiple vectors
+  def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>;
+}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index fc2e889d3a1a038..9fa184c545705b8 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1046,7 +1046,7 @@ multiclass sve_int_count_v<bits<5> opc, string asm,
                   (!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>;
 }
 
-class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
+class sve_int_pcount_pred<bits<2> sz8_64, bits<3> opc, string asm,
                           PPRRegOp pprty>
 : I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn),
   asm, "\t$Rd, $Pg, $Pn",
@@ -1058,17 +1058,17 @@ class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
   let Inst{31-24} = 0b00100101;
   let Inst{23-22} = sz8_64;
   let Inst{21-19} = 0b100;
-  let Inst{18-16} = opc{3-1};
+  let Inst{18-16} = opc{2-0};
   let Inst{15-14} = 0b10;
   let Inst{13-10} = Pg;
-  let Inst{9}     = opc{0};
+  let Inst{9}     = 0b0;
   let Inst{8-5}   = Pn;
   let Inst{4-0}   = Rd;
 
   let hasSideEffects = 0;
 }
 
-multiclass sve_int_pcount_pred<bits<4> opc, string asm,
+multiclass sve_int_pcount_pred<bits<3> opc, string asm,
                                SDPatternOperator int_op> {
   def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>;
   def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>;
@@ -1081,6 +1081,12 @@ multiclass sve_int_pcount_pred<bits<4> opc, string asm,
   def : SVE_2_Op_Pat<i64, int_op, nxv2i1,  nxv2i1,  !cast<Instruction>(NAME # _D)>;
 }
 
+multiclass sve_int_pcount_pred_tmp<bits<3> opc, string asm> {
+  def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>;
+  def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>;
+  def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>;
+  def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>;
+}
 //===----------------------------------------------------------------------===//
 // SVE Element Count Group
 //===----------------------------------------------------------------------===//
@@ -3181,6 +3187,41 @@ multiclass sve_fp_z2op_p_zd_frint<bits<2> opc, string asm> {
   def _D : sve_fp_z2op_p_zd<{ 0b0010, opc{1}, 1, opc{0} }, asm, ZPR64, ZPR64>;
 }
 
+multiclass sve_fp_z2op_p_zd_d<bit U, string asm> {
+  def _HtoH : sve_fp_z2op_p_zd<{ 0b011101, U }, asm, ZPR16, ZPR16>;
+  def _HtoS : sve_fp_z2op_p_zd<{ 0b011110, U }, asm, ZPR16, ZPR32>;
+  def _HtoD : sve_fp_z2op_p_zd<{ 0b011111, U }, asm, ZPR16, ZPR64>;
+  def _StoS : sve_fp_z2op_p_zd<{ 0b101110, U }, asm, ZPR32, ZPR32>;
+  def _StoD : sve_fp_z2op_p_zd<{ 0b111110, U }, asm, ZPR32, ZPR64>;
+  def _DtoS : sve_fp_z2op_p_zd<{ 0b111100, U }, asm, ZPR64, ZPR32>;
+  def _DtoD : sve_fp_z2op_p_zd<{ 0b111111, U }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
+  def _HtoH : sve_fp_z2op_p_zd<{ 0b011001, U }, asm, ZPR16, ZPR16>;
+  def _StoH : sve_fp_z2op_p_zd<{ 0b011010, U }, asm, ZPR32, ZPR16>;
+  def _StoS : sve_fp_z2op_p_zd<{ 0b101010, U }, asm, ZPR32, ZPR32>;
+  def _StoD : sve_fp_z2op_p_zd<{ 0b111000, U }, asm, ZPR32, ZPR64>;
+  def _DtoS : sve_fp_z2op_p_zd<{ 0b111010, U }, asm, ZPR64, ZPR32>;
+  def _DtoH : sve_fp_z2op_p_zd<{ 0b011011, U }, asm, ZPR64, ZPR16>;
+  def _DtoD : sve_fp_z2op_p_zd<{ 0b111011, U }, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve_fp_z2op_p_zd_d_flogb<string asm> {
+  def _H : sve_fp_z2op_p_zd<0b0011001, asm, ZPR16, ZPR16>;
+  def _S : sve_fp_z2op_p_zd<0b0011010, asm, ZPR32, ZPR32>;
+  def _D : sve_fp_z2op_p_zd<0b0011011, asm, ZPR64, ZPR64>;
+}
+
+multiclass sve_fp_z2op_p_zd_b_0<string asm> {
+  def _StoH : sve_fp_z2op_p_zd<0b1001000, asm, ZPR32, ZPR16>;
+  def _HtoS : sve_fp_z2op_p_zd<0b1001001, asm, ZPR16, ZPR32>;
+  def _DtoH : sve_fp_z2op_p_zd<0b1101000, asm, ZPR64, ZPR16>;
+  def _HtoD : sve_fp_z2op_p_zd<0b1101001, asm, ZPR16, ZPR64>;
+  def _DtoS : sve_fp_z2op_p_zd<0b1101010, asm, ZPR64, ZPR32>;
+  def _StoD : sve_fp_z2op_p_zd<0b1101011, asm, ZPR32, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Arithmetic - Binary Predicated Group
 //===----------------------------------------------------------------------===//
@@ -3938,7 +3979,7 @@ multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperato
   def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv4i32, !cast<Instruction>(NAME # _D)>;
 }
 
-class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
+class sve2_int_un_pred_arit<bits<2> sz, bits<2> opc,
                             string asm, ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
   asm, "\t$Zd, $Pg/m, $Zn",
@@ -3950,23 +3991,44 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
   let Inst{31-24} = 0b01000100;
   let Inst{23-22} = sz;
   let Inst{21-20} = 0b00;
-  let Inst{19}    = Q;
-  let Inst{18}    = 0b0;
-  let Inst{17-16} = opc;
+  let Inst{19}    = opc{1};
+  let Inst{18-17} = 0b00;
+  let Inst{16}    = opc{0};
   let Inst{15-13} = 0b101;
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
-
   let Constraints = "$Zd = $_Zd";
   let DestructiveInstType = DestructiveUnaryPassthru;
   let ElementSize = zprty.ElementSize;
   let hasSideEffects = 0;
 }
 
-multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
+class sve2_int_un_pred_arit_z<bits<2> sz, bits<2> opc,
+                              string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn),
+  asm, "\t$Zd, $Pg/z, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-24} = 0b01000100;
+  let Inst{23-22} = sz;
+  let Inst{21-20} = 0b00;
+  let Inst{19}    = opc{1};
+  let Inst{18-17} = 0b01;
+  let Inst{16}    = opc{0};
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+  let hasSideEffects = 0;
+}
+
+multiclass sve2_int_un_pred_arit_s<bits<2> opc, string asm,
                                    SDPatternOperator op> {
-  def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
+  def _S : sve2_int_un_pred_arit<0b10, opc, asm, ZPR32>,
            SVEPseudo2Instr<NAME # _S, 1>;
 
   def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
@@ -3976,14 +4038,14 @@ multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
   defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1,  nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
 }
 
-multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op> {
-  def _B : sve2_int_un_pred_arit<0b00, opc{2}, opc{1-0}, asm, ZPR8>,
+multiclass sve2_int_un_pred_arit<bits<2> opc, string asm, SDPatternOperator op> {
+  def _B : sve2_int_un_pred_arit<0b00, opc, asm, ZPR8>,
            SVEPseudo2Instr<NAME # _B, 1>;
-  def _H : sve2_int_un_pred_arit<0b01, opc{2}, opc{1-0}, asm, ZPR16>,
+  def _H : sve2_int_un_pred_arit<0b01, opc, asm, ZPR16>,
            SVEPseudo2Instr<NAME # _H, 1>;
-  def _S : sve2_int_un_pred_arit<0b10, opc{2}, opc{1-0}, asm, ZPR32>,
+  def _S : sve2_int_un_pred_arit<0b10, opc, asm, ZPR32>,
            SVEPseudo2Instr<NAME # _S, 1>;
-  def _D : sve2_int_un_pred_arit<0b11, opc{2}, opc{1-0}, asm, ZPR64>,
+  def _D : sve2_int_un_pred_arit<0b11, opc, asm, ZPR64>,
            SVEPseudo2Instr<NAME # _D, 1>;
 
   def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
@@ -4002,6 +4064,13 @@ multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op>
   defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1,  nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
+multiclass sve2_int_un_pred_arit_z<bits<2> opc, string asm> {
+  def _B : sve2_int_un_pred_arit_z<0b00, opc, asm, ZPR8>;
+  def _H : sve2_int_un_pred_arit_z<0b01, opc, asm, ZPR16>;
+  def _S : sve2_int_un_pred_arit_z<0b10, opc, asm, ZPR32>;
+  def _D : sve2_int_un_pred_arit_z<0b11, opc, asm, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE2 Widening Integer Arithmetic Group
 //===----------------------------------------------------------------------===//
@@ -4631,8 +4700,30 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
   let hasSideEffects = 0;
 }
 
-multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
-                                  SDPatternOperator op> {
+class sve_int_un_pred_arit_z<bits<2> sz8_64, bits<4> opc,
+                            string asm, ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn),
+  asm, "\t$Zd, $Pg/z, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zd;
+  bits<5> Zn;
+  let Inst{31-24} = 0b00000100;
+  let Inst{23-22} = sz8_64;
+  let Inst{21-20} = 0b00;
+  let Inst{19}    = opc{0};
+  let Inst{18-16} = opc{3-1};
+  let Inst{15-13} = 0b101;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
+}
+
+multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
+                                SDPatternOperator op> {
   def _B : sve_int_un_pred_arit<0b00, { opc, 0b0 }, asm, ZPR8>,
            SVEPseudo2Instr<NAME # _B, 1>;
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
@@ -4658,8 +4749,15 @@ multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
   defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
-multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
-                                    SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm> {
+  def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b0 }, asm, ZPR8>;
+  def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>;
+  def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
+  def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
+                                  SDPatternOperator op> {
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b0 }, asm, ZPR16>,
            SVEPseudo2Instr<NAME # _H, 1>;
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
@@ -4680,8 +4778,14 @@ multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
   defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
-multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
-                                    SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_h_z<bits<3> opc, string asm> {
+  def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>;
+  def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
+  def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_w<bits<3> opc, string asm,
+                                  SDPatternOperator op> {
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>,
            SVEPseudo2Instr<NAME # _S, 1>;
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
@@ -4697,8 +4801,13 @@ multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
   defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
-multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
-                                    SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_w_z<bits<3> opc, string asm> {
+  def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
+  def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_d<bits<3> opc, string asm,
+                                  SDPatternOperator op> {
   def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>,
            SVEPseudo2Instr<NAME # _D, 1>;
 
@@ -4709,8 +4818,8 @@ multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
   defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
-multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
-                                  SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_bitwise<bits<3> opc, string asm,
+                                        SDPatternOperator op> {
   def _B : sve_int_un_pred_arit<0b00, { opc, 0b1 }, asm, ZPR8>,
            SVEPseudo2Instr<NAME # _B, 1>;
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
@@ -4736,7 +4845,15 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
   defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1,  nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
-multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_int_un_pred_arit_bitwise_z<bits<3> opc, string asm> {
+  def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b1 }, asm, ZPR8>;
+  def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
+  def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
+  def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
+}
+
+multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
+                                           SDPatternOperator op> {
   def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>,
            SVEPseudo2Instr<NAME # _H, 1>;
   def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>,
@@ -4763,6 +4880,12 @@ multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator
   defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
 }
 
+multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm> {
+  def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
+  def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
+  def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Integer Wide Immediate - Unpredicated Group
 //===----------------------------------------------------------------------===//
@@ -7198,6 +7321,32 @@ multiclass sve2_int_perm_splice_cons<string asm> {
   def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>;
 }
 
+class sve2_int_perm_expand<bits<2> sz, string asm,
+                           ZPRRegOp zprty>
+: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn),
+  asm, "\t$Zd, $Pg, $Zn",
+  "",
+  []>, Sched<[]> {
+  bits<3> Pg;
+  bits<5> Zn;
+  bits<5> Zd;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = sz;
+  let Inst{21-13} = 0b110001100;
+  let Inst{12-10} = Pg;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zd;
+
+  let hasSideEffects = 0;
+}
+
+multiclass sve2_int_perm_expand<string asm> {
+  def _B : sve2_int_perm_expand<0b00, asm, ZPR8>;
+  def _H : sve2_int_perm_expand<0b01, asm, ZPR16>;
+  def _S : sve2_int_perm_expand<0b10, asm, ZPR32>;
+  def _D : sve2_int_perm_expand<0b11, asm, ZPR64>;
+}
+
 class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
                        ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins zprty:$_Zd, PPR3bAny:$Pg, zprty:$Zn),
@@ -7359,7 +7508,7 @@ multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
             (!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
 }
 
-class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
+class sve_int_perm_compact<bits<2> sz, string asm, ZPRRegOp zprty>
 : I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn),
   asm, "\t$Zd, $Pg, $Zn",
   "",
@@ -7367,8 +7516,8 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
   bits<3> Pg;
   bits<5> Zd;
   bits<5> Zn;
-  let Inst{31-23} = 0b000001011;
-  let Inst{22}    = sz;
+  let Inst{31-24} = 0b00000101;
+  let Inst{23-22} = sz;
   let Inst{21-13} = 0b100001100;
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
@@ -7377,9 +7526,9 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
   let hasSideEffects = 0;
 }
 
-multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
-  def _S : sve_int_perm_compact<0b0, asm, ZPR32>;
-  def _D : sve_int_perm_compact<0b1, asm, ZPR64>;
+multiclass sve_int_perm_compact_sd<string asm, SDPatternOperator op> {
+  def _S : sve_int_perm_compact<0b10, asm, ZPR32>;
+  def _D : sve_int_perm_compact<0b11, asm, ZPR64>;
 
   def : SVE_2_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
@@ -7387,6 +7536,11 @@ multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
+multiclass sve_int_perm_compact_bh<string asm> {
+  def _B : sve_int_perm_compact<0b00, asm, ZPR8>;
+  def _H : sve_int_perm_compact<0b01, asm, ZPR16>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Memory - Contiguous Load Group
 //===----------------------------------------------------------------------===//
@@ -8825,6 +8979,46 @@ class sve_crypto_binary_multi4<bits<4> opc, string asm>
   let hasSideEffects = 0;
 }
 
+class sve_crypto_pmlal_multi<string asm>
+: I<(outs ZZ_q_mul_r:$Zda),
+    (ins ZZ_q_mul_r:$_Zda, ZPR64:$Zn, ZPR64:$Zm),
+  asm,
+  "\t$Zda, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zm;
+  bits<5> Zn;
+  bits<4> Zda;
+  let Inst{31-21} = 0b01000101001;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b111111;
+  let Inst{9-5}   = Zn;
+  let Inst{4-1}   = Zda;
+  let Inst{0}     = 0b0;
+
+  let Constraints = "$Zda = $_Zda";
+  let hasSideEffects = 0;
+}
+
+class sve_crypto_pmull_multi<string asm>
+: I<(outs ZZ_q_mul_r:$Zd),
+    (ins ZPR64:$Zn, ZPR64:$Zm),
+  asm,
+  "\t$Zd, $Zn, $Zm",
+  "",
+  []>, Sched<[]> {
+  bits<5> Zm;
+  bits<5> Zn;
+  bits<4> Zd;
+  let Inst{31-21} = 0b01000101001;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b111110;
+  let Inst{9-5}   = Zn;
+  let Inst{4-1}   = Zd;
+  let Inst{0}     = 0b0;
+  let hasSideEffects = 0;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE BFloat16 Group
 //===----------------------------------------------------------------------===//
@@ -8894,30 +9088,6 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
   def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
 }
 
-class sve_bfloat_matmul<string asm>
-: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
-  asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
-  bits<5> Zm;
-  bits<5> Zda;
-  bits<5> Zn;
-  let Inst{31-21} = 0b01100100011;
-  let Inst{20-16} = Zm;
-  let Inst{15-10} = 0b111001;
-  let Inst{9-5}   = Zn;
-  let Inst{4-0}   = Zda;
-
-  let Constraints = "$Zda = $_Zda";
-  let DestructiveInstType = DestructiveOther;
-  let ElementSize = ElementSizeH;
-  let hasSideEffects = 0;
-  let mayRaiseFPException = 1;
-}
-
-multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
-  def NAME : sve_bfloat_matmul<asm>;
-  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
-}
-
 class sve_bfloat_convert<bit N, string asm>
 : I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
   asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
@@ -9040,14 +9210,14 @@ multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
 // SVE Floating Point Matrix Multiply Accumulate Group
 //===----------------------------------------------------------------------===//
 
-class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, zprty:$Zm),
+class sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty>
+: I<(outs zda_ty:$Zda), (ins zda_ty:$_Zda, reg_ty:$Zn, reg_ty:$Zm),
     asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
   bits<5> Zda;
   bits<5> Zn;
   bits<5> Zm;
-  let Inst{31-23} = 0b011001001;
-  let Inst{22}    = sz;
+  let Inst{31-24} = 0b01100100;
+  let Inst{23-22} = opc;
   let Inst{21}    = 1;
   let Inst{20-16} = Zm;
   let Inst{15-10} = 0b111001;
@@ -9056,15 +9226,14 @@ class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
 
   let Constraints = "$Zda = $_Zda";
   let DestructiveInstType = DestructiveOther;
-  let ElementSize = zprty.ElementSize;
   let hasSideEffects = 0;
   let mayRaiseFPException = 1;
 }
 
-multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
-  def NAME : sve_fp_matrix_mla<sz, asm, zprty>;
+multiclass sve_fp_matrix_mla<bits<2> opc, string asm, ZPRRegOp zda_ty, ZPRRegOp reg_ty, SDPatternOperator op, ValueType zda_vt, ValueType reg_vt> {
+  def NAME : sve_fp_matrix_mla<opc, asm, zda_ty, reg_ty>;
 
-  def : SVE_3_Op_Pat<vt, op , vt, vt, vt, !cast<Instruction>(NAME)>;
+  def : SVE_3_Op_Pat<zda_vt, op , zda_vt, reg_vt, reg_vt, !cast<Instruction>(NAME)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -10484,6 +10653,28 @@ class sve2_fp8_mla_long_long_by_indexed_elem<bits<2> TT, string mnemonic>
   let Uses = [FPMR, FPCR];
 }
 
+// FP8 Matrix Multiply-accumulate Group
+class sve2_fp8_mmla<bit opc, ZPRRegOp dst_ty, string mnemonic>
+    : I<(outs dst_ty:$Zda),
+      (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR8:$Zm),
+      mnemonic, "\t$Zda, $Zn, $Zm",
+      "", []>, Sched<[]>{
+  bits<5> Zda;
+  bits<5> Zn;
+  bits<5> Zm;
+  let Inst{31-23} = 0b011001000;
+  let Inst{22}    = opc;
+  let Inst{21}    = 0b1;
+  let Inst{20-16} = Zm;
+  let Inst{15-10} = 0b111000;
+  let Inst{9-5}   = Zn;
+  let Inst{4-0}   = Zda;
+  let Constraints = "$Zda = $_Zda";
+  let DestructiveInstType = DestructiveOther;
+  let ElementSize         = dst_ty.ElementSize;
+  let Uses = [FPMR, FPCR];
+}
+
 class sve_fp8_dot_indexed<bits<4> opc, ZPRRegOp dst_ty, Operand iop_ty, string mnemonic>
 : I<(outs dst_ty:$Zda), (ins dst_ty:$_Zda, ZPR8:$Zn, ZPR3b8:$Zm, iop_ty:$iop),
     mnemonic, "\t$Zda, $Zn, $Zm$iop", "", []>, Sched<[]> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 687a7339da379d5..6a69b9d2bfc7161 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -358,7 +358,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
 
       const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
-      if (!CallerInfo)
+      if (!CallerInfo || !CallerInfo->isValidState())
         return false;
 
       Change = Change | clampStateAndIndicateChange(this->getState(),
@@ -449,7 +449,8 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
     // Check for Intrinsics and propagate attributes.
     const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
         *this, this->getIRPosition(), DepClassTy::REQUIRED);
-    if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
+    if (!AAEdges || !AAEdges->isValidState() ||
+        AAEdges->hasNonAsmUnknownCallee())
       return indicatePessimisticFixpoint();
 
     bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
@@ -465,7 +466,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
       if (IID == Intrinsic::not_intrinsic) {
         const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
             *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
-        if (!AAAMD)
+        if (!AAAMD || !AAAMD->isValidState())
           return indicatePessimisticFixpoint();
         *this &= *AAAMD;
         continue;
@@ -660,7 +661,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
 
       const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
           *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
-      if (!PointerInfoAA)
+      if (!PointerInfoAA || !PointerInfoAA->getState().isValidState())
         return false;
 
       return PointerInfoAA->forallInterferingAccesses(
@@ -717,7 +718,7 @@ struct AAAMDSizeRangeAttribute
 
       const auto *CallerInfo = A.getAAFor<AttributeImpl>(
           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
-      if (!CallerInfo)
+      if (!CallerInfo || !CallerInfo->isValidState())
         return false;
 
       Change |=
@@ -835,7 +836,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
     auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
 
     if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
-            *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
+            *this, IRPosition::function(*F), DepClassTy::REQUIRED);
+        AssumedGroupSize->isValidState()) {
 
       unsigned Min, Max;
       std::tie(Min, Max) = InfoCache.getWavesPerEU(
@@ -864,7 +866,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
           *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
       const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
           *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
-      if (!CallerInfo || !AssumedGroupSize)
+      if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
+          !AssumedGroupSize->isValidState())
         return false;
 
       unsigned Min, Max;
@@ -982,7 +985,8 @@ struct AAAMDGPUNoAGPR
       // TODO: Handle callsite attributes
       const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
           *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
-      return CalleeInfo && CalleeInfo->getAssumed();
+      return CalleeInfo && CalleeInfo->isValidState() &&
+             CalleeInfo->getAssumed();
     };
 
     bool UsedAssumedInformation = false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 21412044d5a0139..80969fce3d77fb5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -22,19 +22,13 @@ def CC_SI_Gfx : CallingConv<[
   // 32 is reserved for the stack pointer
   // 33 is reserved for the frame pointer
   // 34 is reserved for the base pointer
-  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
-    SGPR4, SGPR5, SGPR6, SGPR7,
-    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
-    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
-    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
-  ]>>>,
-
-  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
-    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
-    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
-    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
-  ]>>>,
+  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
+    !foreach(i, !range(4, 30), !cast<Register>("SGPR"#i))  // SGPR4-29
+  >>>,
+
+  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
+    !foreach(i, !range(0, 32), !cast<Register>("VGPR"#i))  // VGPR0-31
+  >>>,
 
   CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>>
 ]>;
@@ -43,93 +37,35 @@ def RetCC_SI_Gfx : CallingConv<[
   CCIfType<[i1], CCPromoteToType<i32>>,
   CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
 
-  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
-    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
-    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
-    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
-    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
-    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
-    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
-    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
-    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
-    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
-    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
-    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
-    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
-    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
-    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
-    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
-    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
-  ]>>>,
+  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
+    !foreach(i, !range(0, 136), !cast<Register>("VGPR"#i))  // VGPR0-135
+  >>>,
 ]>;
 
 def CC_SI_SHADER : CallingConv<[
 
   CCIfType<[i1], CCPromoteToType<i32>>,
-  
-  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
-    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
-    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
-    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
-    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
-    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
-    SGPR40, SGPR41, SGPR42, SGPR43
-  ]>>>,
+
+  CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
+    !foreach(i, !range(0, 44), !cast<Register>("SGPR"#i))  // SGPR0-43
+  >>>,
 
   // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
-  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<[
-    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
-    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
-    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
-    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
-    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
-    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
-    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
-    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
-    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
-    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
-    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
-    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
-    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
-    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
-    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
-    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
-  ]>>>
+  CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, bf16, v2bf16] , CCAssignToReg<
+    !foreach(i, !range(0, 136), !cast<Register>("VGPR"#i))  // VGPR0-135
+  >>>
 ]>;
 
 def RetCC_SI_Shader : CallingConv<[
   CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
-  CCIfType<[i32, i16, v2i16] , CCAssignToReg<[
-    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
-    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
-    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
-    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
-    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
-    SGPR40, SGPR41, SGPR42, SGPR43
-  ]>>,
+  CCIfType<[i32, i16, v2i16] , CCAssignToReg<
+    !foreach(i, !range(0, 44), !cast<Register>("SGPR"#i))  // SGPR0-43
+  >>,
 
   // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
-  CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<[
-    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
-    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
-    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
-    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
-    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
-    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
-    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
-    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
-    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
-    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
-    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
-    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
-    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
-    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
-    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
-    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
-  ]>>
+  CCIfType<[f32, f16, v2f16, bf16, v2bf16] , CCAssignToReg<
+    !foreach(i, !range(0, 136), !cast<Register>("VGPR"#i))  // VGPR0-135
+  >>
 ]>;
 
 def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
@@ -194,11 +130,9 @@ def CC_AMDGPU_Func : CallingConv<[
     !foreach(i, !range(0, 30), !cast<Register>("SGPR"#i))  // SGPR0-29
   >>>,
 
-  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<[
-    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
-    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
-    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
+  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, i1, bf16, v2bf16], CCAssignToReg<
+    !foreach(i, !range(0, 32), !cast<Register>("VGPR"#i))  // VGPR0-31
+  >>,
   CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1, bf16, v2bf16], CCAssignToStack<4, 4>>
 ]>;
 
@@ -206,11 +140,9 @@ def CC_AMDGPU_Func : CallingConv<[
 def RetCC_AMDGPU_Func : CallingConv<[
   CCIfType<[i1], CCPromoteToType<i32>>,
   CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
-  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<[
-    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
-    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
-    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
-    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
+  CCIfType<[i32, f32, i16, f16, v2i16, v2f16, bf16, v2bf16], CCAssignToReg<
+    !foreach(i, !range(0, 32), !cast<Register>("VGPR"#i))  // VGPR0-31
+  >>,
 ]>;
 
 def CC_AMDGPU : CallingConv<[
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 0f65df0763cc834..e4b54c7d72b0835 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -512,18 +512,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
 
   for (MVT VT : VectorIntTypes) {
     // Expand the following operations for the current type by default.
-    setOperationAction({ISD::ADD,        ISD::AND,     ISD::FP_TO_SINT,
-                        ISD::FP_TO_UINT, ISD::MUL,     ISD::MULHU,
-                        ISD::MULHS,      ISD::OR,      ISD::SHL,
-                        ISD::SRA,        ISD::SRL,     ISD::ROTL,
-                        ISD::ROTR,       ISD::SUB,     ISD::SINT_TO_FP,
-                        ISD::UINT_TO_FP, ISD::SDIV,    ISD::UDIV,
-                        ISD::SREM,       ISD::UREM,    ISD::SMUL_LOHI,
-                        ISD::UMUL_LOHI,  ISD::SDIVREM, ISD::UDIVREM,
-                        ISD::SELECT,     ISD::VSELECT, ISD::SELECT_CC,
-                        ISD::XOR,        ISD::BSWAP,   ISD::CTPOP,
-                        ISD::CTTZ,       ISD::CTLZ,    ISD::VECTOR_SHUFFLE,
-                        ISD::SETCC},
+    setOperationAction({ISD::ADD,        ISD::AND,          ISD::FP_TO_SINT,
+                        ISD::FP_TO_UINT, ISD::MUL,          ISD::MULHU,
+                        ISD::MULHS,      ISD::OR,           ISD::SHL,
+                        ISD::SRA,        ISD::SRL,          ISD::ROTL,
+                        ISD::ROTR,       ISD::SUB,          ISD::SINT_TO_FP,
+                        ISD::UINT_TO_FP, ISD::SDIV,         ISD::UDIV,
+                        ISD::SREM,       ISD::UREM,         ISD::SMUL_LOHI,
+                        ISD::UMUL_LOHI,  ISD::SDIVREM,      ISD::UDIVREM,
+                        ISD::SELECT,     ISD::VSELECT,      ISD::SELECT_CC,
+                        ISD::XOR,        ISD::BSWAP,        ISD::CTPOP,
+                        ISD::CTTZ,       ISD::CTLZ,         ISD::VECTOR_SHUFFLE,
+                        ISD::SETCC,      ISD::ADDRSPACECAST},
                        VT, Expand);
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index f8744d6a483cffe..7dd7388376f4743 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -1159,7 +1159,6 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
     if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
       if (LI->isVolatile())
         return false;
-
       continue;
     }
 
@@ -1170,12 +1169,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
       // Reject if the stored value is not the pointer operand.
       if (SI->getPointerOperand() != Val)
         return false;
-    } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
+      continue;
+    }
+
+    if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
       if (RMW->isVolatile())
         return false;
-    } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
+      continue;
+    }
+
+    if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
       if (CAS->isVolatile())
         return false;
+      continue;
     }
 
     // Only promote a select if we know that the other select operand
@@ -1186,6 +1192,7 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
 
       // May need to rewrite constant operands.
       WorkList.push_back(ICmp);
+      continue;
     }
 
     // TODO: If we know the address is only observed through flat pointers, we
@@ -1198,8 +1205,9 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
     if (isa<InsertValueInst>(User) || isa<InsertElementInst>(User))
       return false;
 
+    // TODO: Handle vectors of pointers.
     if (!User->getType()->isPointerTy())
-      continue;
+      return false;
 
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(UseInst)) {
       // Be conservative if an address could be computed outside the bounds of
@@ -1504,6 +1512,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
 
       PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
 
+      assert(isa<PointerType>(V->getType()));
+
       // FIXME: It doesn't really make sense to try to do this for all
       // instructions.
       V->mutateType(NewTy);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 8f9495d83cde2dc..5160851f8c4424d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -75,6 +75,13 @@ static cl::opt<size_t> InlineMaxBB(
     cl::desc("Maximum number of BBs allowed in a function after inlining"
              " (compile time constraint)"));
 
+// This default unroll factor is based on microbenchmarks on gfx1030.
+static cl::opt<unsigned> MemcpyLoopUnroll(
+    "amdgpu-memcpy-loop-unroll",
+    cl::desc("Unroll factor (affecting 4x32-bit operations) to use for memory "
+             "operations when lowering memcpy as a loop"),
+    cl::init(16), cl::Hidden);
+
 static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
                               unsigned Depth = 0) {
   const Instruction *I = dyn_cast<Instruction>(Cond);
@@ -409,13 +416,8 @@ int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
   return 1024;
 }
 
-// FIXME: Really we would like to issue multiple 128-bit loads and stores per
-// iteration. Should we report a larger size and let it legalize?
-//
 // FIXME: Should we use narrower types for local/region, or account for when
 // unaligned access is legal?
-//
-// FIXME: This could use fine tuning and microbenchmarks.
 Type *GCNTTIImpl::getMemcpyLoopLoweringType(
     LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
     unsigned DestAddrSpace, Align SrcAlign, Align DestAlign,
@@ -442,9 +444,22 @@ Type *GCNTTIImpl::getMemcpyLoopLoweringType(
     return FixedVectorType::get(Type::getInt32Ty(Context), 2);
   }
 
-  // Global memory works best with 16-byte accesses. Private memory will also
-  // hit this, although they'll be decomposed.
-  return FixedVectorType::get(Type::getInt32Ty(Context), 4);
+  // Global memory works best with 16-byte accesses.
+  // If the operation has a fixed known length that is large enough, it is
+  // worthwhile to return an even wider type and let legalization lower it into
+  // multiple accesses, effectively unrolling the memcpy loop. Private memory
+  // also hits this, although accesses may be decomposed.
+  //
+  // Don't unroll if Length is not a constant, since unrolling leads to worse
+  // performance for length values that are smaller or slightly larger than the
+  // total size of the type returned here. Mitigating that would require a more
+  // complex lowering for variable-length memcpy and memmove.
+  unsigned I32EltsInVector = 4;
+  if (MemcpyLoopUnroll > 0 && isa<ConstantInt>(Length))
+    return FixedVectorType::get(Type::getInt32Ty(Context),
+                                MemcpyLoopUnroll * I32EltsInVector);
+
+  return FixedVectorType::get(Type::getInt32Ty(Context), I32EltsInVector);
 }
 
 void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
@@ -452,7 +467,6 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
     unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
     Align SrcAlign, Align DestAlign,
     std::optional<uint32_t> AtomicCpySize) const {
-  assert(RemainingBytes < 16);
 
   if (AtomicCpySize)
     BaseT::getMemcpyLoopResidualLoweringType(
@@ -462,6 +476,12 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
   Align MinAlign = std::min(SrcAlign, DestAlign);
 
   if (MinAlign != Align(2)) {
+    Type *I32x4Ty = FixedVectorType::get(Type::getInt32Ty(Context), 4);
+    while (RemainingBytes >= 16) {
+      OpsOut.push_back(I32x4Ty);
+      RemainingBytes -= 16;
+    }
+
     Type *I64Ty = Type::getInt64Ty(Context);
     while (RemainingBytes >= 8) {
       OpsOut.push_back(I64Ty);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index dd8d93c3f0b72a0..88caf8196b3c90a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -24,7 +24,7 @@
 using namespace llvm;
 using namespace llvm::AMDGPU;
 
-void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   // FIXME: The current implementation of
   // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
   // as an integer or we provide a name which represents a physical register.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index a72e0fe6ea769f5..4729b8a6aa6f401 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -29,7 +29,7 @@ class AMDGPUInstPrinter : public MCInstPrinter {
                         const MCSubtargetInfo &STI, raw_ostream &O);
   static const char *getRegisterName(MCRegister Reg);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
   static void printRegOperand(MCRegister Reg, raw_ostream &O,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index e0d1cde28452458..2f342365c3a5af8 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -567,8 +567,7 @@ multiclass MIMG_NoSampler_Src_Helper <mimgopc op, string asm,
           def _V1_gfx12 : VSAMPLE_Sampler_gfx12<op, asm, dst_rc, 1>;
       }
       else {
-        def _V1_gfx12 : VIMAGE_NoSampler_gfx12<op, asm, dst_rc, 1,
-                                               !if(enableDisasm, "GFX12", "")>;
+        def _V1_gfx12 : VIMAGE_NoSampler_gfx12<op, asm, dst_rc, 1>;
       }
     }
   }
@@ -789,8 +788,7 @@ multiclass MIMG_Store_Addr_Helper <mimgopc op, string asm,
         }
       }
       if op.HAS_GFX12 then {
-        def _V1_gfx12 : VIMAGE_Store_gfx12 <op, asm, data_rc, 1,
-                                            !if(enableDisasm, "GFX12", "")>;
+        def _V1_gfx12 : VIMAGE_Store_gfx12 <op, asm, data_rc, 1>;
       }
     }
     let VAddrDwords = 2 in {
@@ -1017,9 +1015,9 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
       }
       if op.HAS_GFX12 then {
         if !empty(renamed) then
-          def _V1_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 1, enableDasm>;
+          def _V1_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 1>;
         else
-          def _V1_gfx12 : VIMAGE_Atomic_gfx12_Renamed <op, renamed, data_rc, 1, enableDasm>;
+          def _V1_gfx12 : VIMAGE_Atomic_gfx12_Renamed <op, renamed, data_rc, 1>;
       }
     }
     let VAddrDwords = 2 in {
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index c912a580854c1c4..f0c7837e0bb75ac 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1793,6 +1793,9 @@ bool SIFoldOperandsImpl::tryFoldOMod(MachineInstr &MI) {
 
   DefOMod->setImm(OMod);
   MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
+  // Kill flags can be wrong if we replaced a def inside a loop with a def
+  // outside the loop.
+  MRI->clearKillFlags(Def->getOperand(0).getReg());
   MI.eraseFromParent();
 
   // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 803eb86e08986cf..52ca38aca5c7711 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1948,13 +1948,9 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
   const DataLayout &DL = DAG.getDataLayout();
   MachineFunction &MF = DAG.getMachineFunction();
   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
-
-  const ArgDescriptor *InputPtrReg;
-  const TargetRegisterClass *RC;
-  LLT ArgTy;
   MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
 
-  std::tie(InputPtrReg, RC, ArgTy) =
+  auto [InputPtrReg, RC, ArgTy] =
       Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
 
   // We may not have the kernarg segment argument if we have no kernel
@@ -3335,25 +3331,18 @@ void SITargetLowering::passSpecialInputs(
   // clang-format on
 
   for (auto Attr : ImplicitAttrs) {
-    const ArgDescriptor *OutgoingArg;
-    const TargetRegisterClass *ArgRC;
-    LLT ArgTy;
-
     AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;
 
     // If the callee does not use the attribute value, skip copying the value.
     if (CLI.CB->hasFnAttr(Attr.second))
       continue;
 
-    std::tie(OutgoingArg, ArgRC, ArgTy) =
+    const auto [OutgoingArg, ArgRC, ArgTy] =
         CalleeArgInfo->getPreloadedValue(InputID);
     if (!OutgoingArg)
       continue;
 
-    const ArgDescriptor *IncomingArg;
-    const TargetRegisterClass *IncomingArgRC;
-    LLT Ty;
-    std::tie(IncomingArg, IncomingArgRC, Ty) =
+    const auto [IncomingArg, IncomingArgRC, Ty] =
         CallerArgInfo.getPreloadedValue(InputID);
     assert(IncomingArgRC == ArgRC);
 
@@ -3396,11 +3385,8 @@ void SITargetLowering::passSpecialInputs(
 
   // Pack workitem IDs into a single register or pass it as is if already
   // packed.
-  const ArgDescriptor *OutgoingArg;
-  const TargetRegisterClass *ArgRC;
-  LLT Ty;
 
-  std::tie(OutgoingArg, ArgRC, Ty) =
+  auto [OutgoingArg, ArgRC, Ty] =
       CalleeArgInfo->getPreloadedValue(AMDGPUFunctionArgInfo::WORKITEM_ID_X);
   if (!OutgoingArg)
     std::tie(OutgoingArg, ArgRC, Ty) =
@@ -4460,15 +4446,13 @@ SITargetLowering::emitGWSMemViolTestLoop(MachineInstr &MI,
 
   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
 
-  MachineBasicBlock *LoopBB;
-  MachineBasicBlock *RemainderBB;
   const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
 
   // Apparently kill flags are only valid if the def is in the same block?
   if (MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::data0))
     Src->setIsKill(false);
 
-  std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, *BB, true);
+  auto [LoopBB, RemainderBB] = splitBlockForLoop(MI, *BB, true);
 
   MachineBasicBlock::iterator I = LoopBB->end();
 
@@ -4628,9 +4612,7 @@ loadM0FromVGPR(const SIInstrInfo *TII, MachineBasicBlock &MBB, MachineInstr &MI,
       .addReg(Exec);
   // clang-format on
 
-  MachineBasicBlock *LoopBB;
-  MachineBasicBlock *RemainderBB;
-  std::tie(LoopBB, RemainderBB) = splitBlockForLoop(MI, MBB, false);
+  auto [LoopBB, RemainderBB] = splitBlockForLoop(MI, MBB, false);
 
   const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
 
@@ -5755,8 +5737,7 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
          VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
          VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
 
-  SDValue Lo, Hi;
-  std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
+  auto [Lo, Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
 
   SDLoc SL(Op);
   SDValue OpLo = DAG.getNode(Opc, SL, Lo.getValueType(), Lo, Op->getFlags());
@@ -5776,10 +5757,8 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
          VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
          VT == MVT::v32f32 || VT == MVT::v32i16 || VT == MVT::v32f16);
 
-  SDValue Lo0, Hi0;
-  std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
-  SDValue Lo1, Hi1;
-  std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
+  auto [Lo0, Hi0] = DAG.SplitVectorOperand(Op.getNode(), 0);
+  auto [Lo1, Hi1] = DAG.SplitVectorOperand(Op.getNode(), 1);
 
   SDLoc SL(Op);
 
@@ -5802,15 +5781,13 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
          VT == MVT::v4bf16 || VT == MVT::v8bf16 || VT == MVT::v16bf16 ||
          VT == MVT::v32bf16);
 
-  SDValue Lo0, Hi0;
   SDValue Op0 = Op.getOperand(0);
-  std::tie(Lo0, Hi0) = Op0.getValueType().isVector()
-                           ? DAG.SplitVectorOperand(Op.getNode(), 0)
-                           : std::pair(Op0, Op0);
-  SDValue Lo1, Hi1;
-  std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
-  SDValue Lo2, Hi2;
-  std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
+  auto [Lo0, Hi0] = Op0.getValueType().isVector()
+                        ? DAG.SplitVectorOperand(Op.getNode(), 0)
+                        : std::pair(Op0, Op0);
+
+  auto [Lo1, Hi1] = DAG.SplitVectorOperand(Op.getNode(), 1);
+  auto [Lo2, Hi2] = DAG.SplitVectorOperand(Op.getNode(), 2);
 
   SDLoc SL(Op);
   auto ResVT = DAG.GetSplitDestVTs(VT);
@@ -7427,8 +7404,7 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
 
   if (VecSize == 128 || VecSize == 256 || VecSize == 512) {
     SDValue Lo, Hi;
-    EVT LoVT, HiVT;
-    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
+    auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
 
     if (VecSize == 128) {
       SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
@@ -10459,9 +10435,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     // size.
     switch (Subtarget->getMaxPrivateElementSize()) {
     case 4: {
-      SDValue Ops[2];
-      std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
-      return DAG.getMergeValues(Ops, DL);
+      auto [Op0, Op1] = scalarizeVectorLoad(Load, DAG);
+      return DAG.getMergeValues({Op0, Op1}, DL);
     }
     case 8:
       if (NumElements > 2)
@@ -10493,9 +10468,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
   if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
                                       MemVT, *Load->getMemOperand())) {
-    SDValue Ops[2];
-    std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(Load, DAG);
-    return DAG.getMergeValues(Ops, DL);
+    auto [Op0, Op1] = expandUnalignedLoad(Load, DAG);
+    return DAG.getMergeValues({Op0, Op1}, DL);
   }
 
   return SDValue();
@@ -12534,8 +12508,7 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
     EVT SrcVT = ExtSrc.getValueType();
     if (SrcVT == MVT::i32) {
       SDLoc SL(N);
-      SDValue LowLHS, HiBits;
-      std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
+      auto [LowLHS, HiBits] = split64BitValue(LHS, DAG);
       SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
 
       DCI.AddToWorklist(LowOr.getNode());
@@ -13870,8 +13843,7 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
       getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);
 
   if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) {
-    SDValue AccumLo, AccumHi;
-    std::tie(AccumLo, AccumHi) = DAG.SplitScalar(Accum, SL, MVT::i32, MVT::i32);
+    auto [AccumLo, AccumHi] = DAG.SplitScalar(Accum, SL, MVT::i32, MVT::i32);
 
     if (!MulLHSUnsigned32) {
       auto MulLHSHi =
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
index d76c2810c39f8c6..e669b9479369d63 100644
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp
@@ -93,7 +93,7 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) {
   return BadConditionCode(CC);
 }
 
-void ARCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void ARCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   OS << StringRef(getRegisterName(Reg)).lower();
 }
 
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
index baf4a6915b70657..c4bd73448ca71b7 100644
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h
@@ -30,7 +30,7 @@ class ARCInstPrinter : public MCInstPrinter {
   void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
   static const char *getRegisterName(MCRegister Reg);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
   void printCCOperand(const MCInst *MI, int OpNum, raw_ostream &O);
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 4f366dcffcd29a7..a1d131103239bda 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -3003,6 +3003,17 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
       // on the stack.
       CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
       break;
+    case ARMSubtarget::NoSplit:
+      assert(!MF.getTarget().Options.DisableFramePointerElim(MF) &&
+             "ABI-required frame pointers need a CSR split when signing return "
+             "address.");
+      CSI.insert(find_if(CSI,
+                         [=](const auto &CS) {
+                           Register Reg = CS.getReg();
+                           return Reg != ARM::LR;
+                         }),
+                 CalleeSavedInfo(ARM::R12));
+      break;
     default:
       llvm_unreachable("Unexpected CSR split with return address signing");
     }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 9adfb1fab5f0847..cec44acc5443770 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -492,17 +492,16 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
   const std::vector<CalleeSavedInfo> CSI =
       MF.getFrameInfo().getCalleeSavedInfo();
 
-  // Returns SplitR7 if the frame setup must be split into two separate pushes
-  // of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
-  // always required on Thumb1-only targets, as the push and pop instructions
-  // can't access the high registers. This is also required when R7 is the frame
-  // pointer and frame pointer elimiination is disabled, or branch signing is
-  // enabled and AAPCS is disabled.
-  if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
-       !createAAPCSFrameChain()) ||
-      (getFramePointerReg() == ARM::R7 &&
-       MF.getTarget().Options.DisableFramePointerElim(MF)) ||
-      isThumb1Only())
+  // Thumb1 always splits the pushes at R7, because the Thumb1 push instruction
+  // cannot use high registers except for lr.
+  if (isThumb1Only())
+    return SplitR7;
+
+  // If R7 is the frame pointer, we must split at R7 to ensure that the
+  // previous frame pointer (R7) and return address (LR) are adjacent on the
+  // stack, to form a valid frame record.
+  if (getFramePointerReg() == ARM::R7 &&
+      MF.getTarget().Options.FramePointerIsReserved(MF))
     return SplitR7;
 
   // Returns SplitR11WindowsSEH when the stack pointer needs to be
@@ -515,11 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
       (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
     return SplitR11WindowsSEH;
 
-  // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
-  // other in the list of callee saved registers in a frame, and branch
-  // signing is enabled.
+  // Returns SplitR11AAPCSSignRA when the frame pointer is R11, requiring R11
+  // and LR to be adjacent on the stack, and branch signing is enabled,
+  // requiring R12 to be on the stack.
   if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
-      getFramePointerReg() == ARM::R11)
+      getFramePointerReg() == ARM::R11 &&
+      MF.getTarget().Options.FramePointerIsReserved(MF))
     return SplitR11AAPCSSignRA;
   return NoSplit;
 }
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 214c5f1b45e556c..2f7af05a259f8f9 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -95,6 +95,10 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
     /// push {r0-r7, lr}
     /// push {r8-r12}
     /// vpush {d8-d15}
+    /// Note that Thumb1 changes this layout when the frame pointer is R11,
+    /// using a longer sequence of instructions because R11 can't be used by a
+    /// Thumb1 push instruction. This doesn't currently have a separate enum
+    /// value, and is handled entriely within Thumb1FrameLowering::emitPrologue.
     SplitR7,
 
     /// When the stack frame size is not known (because of variable-sized
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 9f6e5e5ab1421c8..0e29648a7a284fe 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -2063,6 +2063,7 @@ bool ARMTTIImpl::isLoweredToCall(const Function *F) {
   case Intrinsic::powi:
   case Intrinsic::sin:
   case Intrinsic::cos:
+  case Intrinsic::sincos:
   case Intrinsic::pow:
   case Intrinsic::log:
   case Intrinsic::log10:
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 906519fef45db4d..0df1c336a221462 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -5080,24 +5080,24 @@ ParseStatus ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
   if (Tok.is(AsmToken::Identifier)) {
     StringRef OptStr = Tok.getString();
 
-    Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()).lower())
-      .Case("sy",    ARM_MB::SY)
-      .Case("st",    ARM_MB::ST)
-      .Case("ld",    ARM_MB::LD)
-      .Case("sh",    ARM_MB::ISH)
-      .Case("ish",   ARM_MB::ISH)
-      .Case("shst",  ARM_MB::ISHST)
-      .Case("ishst", ARM_MB::ISHST)
-      .Case("ishld", ARM_MB::ISHLD)
-      .Case("nsh",   ARM_MB::NSH)
-      .Case("un",    ARM_MB::NSH)
-      .Case("nshst", ARM_MB::NSHST)
-      .Case("nshld", ARM_MB::NSHLD)
-      .Case("unst",  ARM_MB::NSHST)
-      .Case("osh",   ARM_MB::OSH)
-      .Case("oshst", ARM_MB::OSHST)
-      .Case("oshld", ARM_MB::OSHLD)
-      .Default(~0U);
+    Opt = StringSwitch<unsigned>(OptStr.lower())
+              .Case("sy", ARM_MB::SY)
+              .Case("st", ARM_MB::ST)
+              .Case("ld", ARM_MB::LD)
+              .Case("sh", ARM_MB::ISH)
+              .Case("ish", ARM_MB::ISH)
+              .Case("shst", ARM_MB::ISHST)
+              .Case("ishst", ARM_MB::ISHST)
+              .Case("ishld", ARM_MB::ISHLD)
+              .Case("nsh", ARM_MB::NSH)
+              .Case("un", ARM_MB::NSH)
+              .Case("nshst", ARM_MB::NSHST)
+              .Case("nshld", ARM_MB::NSHLD)
+              .Case("unst", ARM_MB::NSHST)
+              .Case("osh", ARM_MB::OSH)
+              .Case("oshst", ARM_MB::OSHST)
+              .Case("oshld", ARM_MB::OSHLD)
+              .Default(~0U);
 
     // ishld, oshld, nshld and ld are only available from ARMv8.
     if (!hasV8Ops() && (Opt == ARM_MB::ISHLD || Opt == ARM_MB::OSHLD ||
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index e4a2f8c8f2ea0ce..5a6895a4ab84ef3 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -50,7 +50,7 @@ static unsigned translateShiftImm(unsigned imm) {
 }
 
 static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc,
-                             unsigned ShImm, const ARMInstPrinter &printer) {
+                             unsigned ShImm, ARMInstPrinter &printer) {
   if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm))
     return;
   O << ", ";
@@ -81,7 +81,7 @@ bool ARMInstPrinter::applyTargetSpecificCLOption(StringRef Opt) {
   return false;
 }
 
-void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   markup(OS, Markup::Register) << getRegisterName(Reg, DefaultAltIdx);
 }
 
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
index 494a644cf545462..cd1dddc5f331a33 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h
@@ -27,7 +27,7 @@ class ARMInstPrinter : public MCInstPrinter {
 
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
 
   // Autogenerated by tblgen.
   std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index 9809caa8bd8f65b..f88daeed8d42154 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -97,28 +97,28 @@ def iHasFLOAT7E60 : Predicate<"Subtarget->hasFLOAT7E60()">,
              "Support CSKY float7e60 instructions">;
 
 def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
-                                    "Enable divide instrutions">;
+                                    "Enable divide instructions">;
 def HasHWDiv : Predicate<"Subtarget->hasHardwareDivide()">,
                AssemblerPredicate<(all_of FeatureHWDiv),
-               "Enable divide instrutions">;
+               "Enable divide instructions">;
 
 def FeatureSTM : SubtargetFeature<"multiple_stld", "HasSTM", "true",
-                                  "Enable multiple load/store instrutions">;
+                                  "Enable multiple load/store instructions">;
 def HasSTM : Predicate<"Subtarget->hasSTM()">,
              AssemblerPredicate<(all_of FeatureSTM),
-             "Enable multiple load/store instrutions">;
+             "Enable multiple load/store instructions">;
 
 def FeaturePushPop : SubtargetFeature<"pushpop", "HasPushPop", "true",
-                                      "Enable push/pop instrutions">;
+                                      "Enable push/pop instructions">;
 def HasPushPop : Predicate<"Subtarget->hasPushPop()">,
                  AssemblerPredicate<(all_of FeaturePushPop),
-                 "Enable push/pop instrutions">;
+                 "Enable push/pop instructions">;
 
 def FeatureDSP
-    : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instrutions">;
+    : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instructions">;
 def HasDSP : Predicate<"Subtarget->hasDSP()">,
              AssemblerPredicate<(all_of FeatureDSP),
-             "Enable DSP instrutions">;
+             "Enable DSP instructions">;
 
 def HasDSP1E2
     : SubtargetFeature<"dsp1e2", "HasDSP1E2", "true", "Support CSKY dsp1e2 instructions">;
@@ -133,16 +133,16 @@ def iHasDSPE60 : Predicate<"Subtarget->hasDSPE60()">,
              "Support CSKY dspe60 instructions">;
 
 def FeatureDSPV2 : SubtargetFeature<"dspv2", "HasDSPV2", "true",
-                                    "Enable DSP V2.0 instrutions">;
+                                    "Enable DSP V2.0 instructions">;
 def HasDSPV2 : Predicate<"Subtarget->hasDSPV2()">,
                AssemblerPredicate<(all_of FeatureDSPV2),
-               "Enable DSP V2.0 instrutions">;
+               "Enable DSP V2.0 instructions">;
 
 def FeatureDSP_Silan : SubtargetFeature<"dsp_silan", "HasDSP_Silan", "true",
-                                    "Enable DSP Silan instrutions">;
+                                    "Enable DSP Silan instructions">;
 def HasDSP_Silan : Predicate<"Subtarget->hasDSP_Silan()">,
                AssemblerPredicate<(all_of FeatureDSP_Silan),
-               "Enable DSP Silan instrutions">;
+               "Enable DSP Silan instructions">;
 
 // Atomic Support
 def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true",
@@ -232,11 +232,11 @@ def FeatureSoftTP : SubtargetFeature<"soft-tp", "ReadTPHard", "false",
                                      "Disable TLS Pointer register">;
 
 def FeatureIstack : SubtargetFeature<"istack", "EnableInterruptAttribute",
-                                     "true", "Enable interrput attribute">;
+                                     "true", "Enable interrupt attribute">;
 def EnableInterruptAttribute
     : Predicate<"Subtarget->enableInterruptAttribute()">,
       AssemblerPredicate<(all_of FeatureIstack),
-      "Enable interrput attribute">;
+      "Enable interrupt attribute">;
 
 def FeatureConstPool : SubtargetFeature<"constpool", "DumpConstPool", "true",
                                         "Dump the constant pool by compiler">;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index 9af7958112fce30..a4b0d8488cf53b3 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -82,7 +82,7 @@ void CSKYInstPrinter::printInst(const MCInst *MI, uint64_t Address,
   printAnnotation(O, Annot);
 }
 
-void CSKYInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+void CSKYInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
   if (PrintBranchImmAsAddress)
     O << getRegisterName(Reg, ABIRegNames ? CSKY::ABIRegAltName
                                           : CSKY::NoRegAltName);
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
index 461d7f6f12b371b..16eccfdfb5ce5bc 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
@@ -31,7 +31,7 @@ class CSKYInstPrinter : public MCInstPrinter {
 
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
-  void printRegName(raw_ostream &O, MCRegister Reg) const override;
+  void printRegName(raw_ostream &O, MCRegister Reg) override;
 
   void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, const char *Modifier = nullptr);
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 147b32b1ca99030..263ca50011aa7b5 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -47,6 +47,7 @@ def ResRetInt32Ty : DXILOpParamType;
 def HandleTy : DXILOpParamType;
 def ResBindTy : DXILOpParamType;
 def ResPropsTy : DXILOpParamType;
+def SplitDoubleTy : DXILOpParamType;
 
 class DXILOpClass;
 
@@ -293,6 +294,43 @@ class Attributes<Version ver = DXIL1_0, list<DXILAttribute> attrs> {
   list<DXILAttribute> op_attrs = attrs;
 }
 
+class DXILConstant<int value_> {
+  int value = value_;
+}
+
+defset list<DXILConstant> BarrierModes = {
+  def BarrierMode_DeviceMemoryBarrier              : DXILConstant<2>;
+  def BarrierMode_DeviceMemoryBarrierWithGroupSync : DXILConstant<3>;
+  def BarrierMode_GroupMemoryBarrier               : DXILConstant<8>;
+  def BarrierMode_GroupMemoryBarrierWithGroupSync  : DXILConstant<9>;
+  def BarrierMode_AllMemoryBarrier                 : DXILConstant<10>;
+  def BarrierMode_AllMemoryBarrierWithGroupSync    : DXILConstant<11>;
+}
+
+// Intrinsic arg selection
+class Arg {
+  int index = -1;
+  DXILConstant value;
+  bit is_i8 = 0;
+  bit is_i32 = 0;
+}
+class ArgSelect<int index_> : Arg {
+  let index = index_;
+}
+class ArgI32<DXILConstant value_> : Arg {
+  let value = value_;
+  let is_i32 = 1;
+}
+class ArgI8<DXILConstant value_> : Arg {
+  let value = value_;
+  let is_i8 = 1;
+}
+
+class IntrinsicSelect<Intrinsic intrinsic_, list<Arg> args_> {
+  Intrinsic intrinsic = intrinsic_;
+  list<Arg> args = args_;
+}
+
 // Abstraction DXIL Operation
 class DXILOp<int opcode, DXILOpClass opclass> {
   // A short description of the operation
@@ -307,6 +345,9 @@ class DXILOp<int opcode, DXILOpClass opclass> {
   // LLVM Intrinsic DXIL Operation maps to
   Intrinsic LLVMIntrinsic = ?;
 
+  // Non-trivial LLVM Intrinsics DXIL Operation maps to
+  list<IntrinsicSelect> intrinsic_selects = [];
+
   // Result type of the op
   DXILOpParamType result;
 
@@ -553,11 +594,10 @@ def Rbits :  DXILOp<30, unary> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
-def CBits :  DXILOp<31, unary> {
+def CountBits :  DXILOp<31, unaryBits> {
   let Doc = "Returns the number of 1 bits in the specified value.";
-  let LLVMIntrinsic = int_ctpop;
   let arguments = [OverloadTy];
-  let result = OverloadTy;
+  let result = Int32Ty;
   let overloads =
       [Overloads<DXIL1_0, [Int16Ty, Int32Ty, Int64Ty]>];
   let stages = [Stages<DXIL1_0, [all_stages]>];
@@ -779,6 +819,15 @@ def FlattenedThreadIdInGroup :  DXILOp<96, flattenedThreadIdInGroup> {
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
 
+def SplitDouble :  DXILOp<102, splitDouble> {
+  let Doc = "Splits a double into 2 uints";
+  let arguments = [OverloadTy];
+  let result = SplitDoubleTy;
+  let overloads = [Overloads<DXIL1_0, [DoubleTy]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadNone]>];
+}
+
 def AnnotateHandle : DXILOp<217, annotateHandle> {
   let Doc = "annotate handle with resource properties";
   let arguments = [HandleTy, ResPropsTy];
@@ -820,3 +869,17 @@ def WaveGetLaneIndex : DXILOp<111, waveGetLaneIndex> {
   let stages = [Stages<DXIL1_0, [all_stages]>];
   let attributes = [Attributes<DXIL1_0, [ReadNone]>];
 }
+
+def Barrier : DXILOp<80, barrier> {
+  let Doc = "inserts a memory barrier in the shader";
+  let intrinsic_selects = [
+    IntrinsicSelect<
+        int_dx_group_memory_barrier_with_group_sync,
+        [ ArgI32<BarrierMode_GroupMemoryBarrierWithGroupSync> ]>,
+  ];
+
+  let arguments = [Int32Ty];
+  let result = VoidTy;
+  let stages = [Stages<DXIL1_0, [compute, library]>];
+  let attributes = [Attributes<DXIL1_0, []>];
+}
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 7719d6b1079110b..5d5bb3eacace258 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -229,6 +229,13 @@ static StructType *getResPropsType(LLVMContext &Context) {
   return StructType::create({Int32Ty, Int32Ty}, "dx.types.ResourceProperties");
 }
 
+static StructType *getSplitDoubleType(LLVMContext &Context) {
+  if (auto *ST = StructType::getTypeByName(Context, "dx.types.splitdouble"))
+    return ST;
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  return StructType::create({Int32Ty, Int32Ty}, "dx.types.splitdouble");
+}
+
 static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
                                     Type *OverloadTy) {
   switch (Kind) {
@@ -266,6 +273,8 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
     return getResBindType(Ctx);
   case OpParamType::ResPropsTy:
     return getResPropsType(Ctx);
+  case OpParamType::SplitDoubleTy:
+    return getSplitDoubleType(Ctx);
   }
   llvm_unreachable("Invalid parameter kind");
   return nullptr;
@@ -467,6 +476,10 @@ StructType *DXILOpBuilder::getResRetType(Type *ElementTy) {
   return ::getResRetType(ElementTy);
 }
 
+StructType *DXILOpBuilder::getSplitDoubleType(LLVMContext &Context) {
+  return ::getSplitDoubleType(Context);
+}
+
 StructType *DXILOpBuilder::getHandleType() {
   return ::getHandleType(IRB.getContext());
 }
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
index 037ae3822cfb906..df5a0240870f4a4 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.h
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -49,6 +49,10 @@ class DXILOpBuilder {
 
   /// Get a `%dx.types.ResRet` type with the given element type.
   StructType *getResRetType(Type *ElementTy);
+
+  /// Get the `%dx.types.splitdouble` type.
+  StructType *getSplitDoubleType(LLVMContext &Context);
+
   /// Get the `%dx.types.Handle` type.
   StructType *getHandleType();
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index c62ba8c21d67917..b5cf1654181c6c7 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IntrinsicsDirectX.h"
 #include "llvm/IR/Module.h"
@@ -105,8 +106,58 @@ class OpLowerer {
     return false;
   }
 
-  [[nodiscard]]
-  bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) {
+  struct ArgSelect {
+    enum class Type {
+      Index,
+      I8,
+      I32,
+    };
+    Type Type = Type::Index;
+    int Value = -1;
+  };
+
+  [[nodiscard]] bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp,
+                                           ArrayRef<ArgSelect> ArgSelects) {
+    bool IsVectorArgExpansion = isVectorArgExpansion(F);
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      OpBuilder.getIRB().SetInsertPoint(CI);
+      SmallVector<Value *> Args;
+      if (ArgSelects.size()) {
+        for (const ArgSelect &A : ArgSelects) {
+          switch (A.Type) {
+          case ArgSelect::Type::Index:
+            Args.push_back(CI->getArgOperand(A.Value));
+            break;
+          case ArgSelect::Type::I8:
+            Args.push_back(OpBuilder.getIRB().getInt8((uint8_t)A.Value));
+            break;
+          case ArgSelect::Type::I32:
+            Args.push_back(OpBuilder.getIRB().getInt32(A.Value));
+            break;
+          default:
+            llvm_unreachable("Invalid type of intrinsic arg select.");
+          }
+        }
+      } else if (IsVectorArgExpansion) {
+        Args = argVectorFlatten(CI, OpBuilder.getIRB());
+      } else {
+        Args.append(CI->arg_begin(), CI->arg_end());
+      }
+
+      Expected<CallInst *> OpCall =
+          OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
+      if (Error E = OpCall.takeError())
+        return E;
+
+      CI->replaceAllUsesWith(*OpCall);
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
+  [[nodiscard]] bool replaceFunctionWithNamedStructOp(
+      Function &F, dxil::OpCode DXILOp, Type *NewRetTy,
+      llvm::function_ref<Error(CallInst *CI, CallInst *Op)> ReplaceUses) {
     bool IsVectorArgExpansion = isVectorArgExpansion(F);
     return replaceFunction(F, [&](CallInst *CI) -> Error {
       SmallVector<Value *> Args;
@@ -118,12 +169,12 @@ class OpLowerer {
         Args.append(CI->arg_begin(), CI->arg_end());
 
       Expected<CallInst *> OpCall =
-          OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), F.getReturnType());
+          OpBuilder.tryCreateOp(DXILOp, Args, CI->getName(), NewRetTy);
       if (Error E = OpCall.takeError())
         return E;
+      if (Error E = ReplaceUses(CI, *OpCall))
+        return E;
 
-      CI->replaceAllUsesWith(*OpCall);
-      CI->eraseFromParent();
       return Error::success();
     });
   }
@@ -263,6 +314,26 @@ class OpLowerer {
     return lowerToBindAndAnnotateHandle(F);
   }
 
+  Error replaceSplitDoubleCallUsages(CallInst *Intrin, CallInst *Op) {
+    for (Use &U : make_early_inc_range(Intrin->uses())) {
+      if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
+
+        if (EVI->getNumIndices() != 1)
+          return createStringError(std::errc::invalid_argument,
+                                   "Splitdouble has only 2 elements");
+        EVI->setOperand(0, Op);
+      } else {
+        return make_error<StringError>(
+            "Splitdouble use is not ExtractValueInst",
+            inconvertibleErrorCode());
+      }
+    }
+
+    Intrin->eraseFromParent();
+
+    return Error::success();
+  }
+
   /// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
   /// Since we expect to be post-scalarization, make an effort to avoid vectors.
   Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) {
@@ -460,6 +531,73 @@ class OpLowerer {
     });
   }
 
+  [[nodiscard]] bool lowerCtpopToCountBits(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+    Type *Int32Ty = IRB.getInt32Ty();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+      SmallVector<Value *> Args;
+      Args.append(CI->arg_begin(), CI->arg_end());
+
+      Type *RetTy = Int32Ty;
+      Type *FRT = F.getReturnType();
+      if (const auto *VT = dyn_cast<VectorType>(FRT))
+        RetTy = VectorType::get(RetTy, VT);
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          dxil::OpCode::CountBits, Args, CI->getName(), RetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+
+      // If the result type is 32 bits we can do a direct replacement.
+      if (FRT->isIntOrIntVectorTy(32)) {
+        CI->replaceAllUsesWith(*OpCall);
+        CI->eraseFromParent();
+        return Error::success();
+      }
+
+      unsigned CastOp;
+      unsigned CastOp2;
+      if (FRT->isIntOrIntVectorTy(16)) {
+        CastOp = Instruction::ZExt;
+        CastOp2 = Instruction::SExt;
+      } else { // must be 64 bits
+        assert(FRT->isIntOrIntVectorTy(64) &&
+               "Currently only lowering 16, 32, or 64 bit ctpop to CountBits \
+                is supported.");
+        CastOp = Instruction::Trunc;
+        CastOp2 = Instruction::Trunc;
+      }
+
+      // It is correct to replace the ctpop with the dxil op and
+      // remove all casts to i32
+      bool NeedsCast = false;
+      for (User *User : make_early_inc_range(CI->users())) {
+        Instruction *I = dyn_cast<Instruction>(User);
+        if (I && (I->getOpcode() == CastOp || I->getOpcode() == CastOp2) &&
+            I->getType() == RetTy) {
+          I->replaceAllUsesWith(*OpCall);
+          I->eraseFromParent();
+        } else
+          NeedsCast = true;
+      }
+
+      // It is correct to replace a ctpop with the dxil op and
+      // a cast from i32 to the return type of the ctpop
+      // the cast is emitted here if there is a non-cast to i32
+      // instr which uses the ctpop
+      if (NeedsCast) {
+        Value *Cast =
+            IRB.CreateZExtOrTrunc(*OpCall, F.getReturnType(), "ctpop.cast");
+        CI->replaceAllUsesWith(Cast);
+      }
+
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
   bool lowerIntrinsics() {
     bool Updated = false;
     bool HasErrors = false;
@@ -471,9 +609,10 @@ class OpLowerer {
       switch (ID) {
       default:
         continue;
-#define DXIL_OP_INTRINSIC(OpCode, Intrin)                                      \
+#define DXIL_OP_INTRINSIC(OpCode, Intrin, ...)                                 \
   case Intrin:                                                                 \
-    HasErrors |= replaceFunctionWithOp(F, OpCode);                             \
+    HasErrors |=                                                               \
+        replaceFunctionWithOp(F, OpCode, ArrayRef<ArgSelect>{__VA_ARGS__});    \
     break;
 #include "DXILOperation.inc"
       case Intrinsic::dx_handle_fromBinding:
@@ -488,6 +627,19 @@ class OpLowerer {
       case Intrinsic::dx_typedBufferStore:
         HasErrors |= lowerTypedBufferStore(F);
         break;
+      // TODO: this can be removed when
+      // https://github.com/llvm/llvm-project/issues/113192 is fixed
+      case Intrinsic::dx_splitdouble:
+        HasErrors |= replaceFunctionWithNamedStructOp(
+            F, OpCode::SplitDouble,
+            OpBuilder.getSplitDoubleType(M.getContext()),
+            [&](CallInst *CI, CallInst *Op) {
+              return replaceSplitDoubleCallUsages(CI, Op);
+            });
+        break;
+      case Intrinsic::ctpop:
+        HasErrors |= lowerCtpopToCountBits(F);
+        break;
       }
       Updated = true;
     }
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index de5c8b86978a827..e4e84a80b5d0bc7 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 #define GET_INSTRUCTION_NAME
 #include "HexagonGenAsmWriter.inc"
 
-void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
   O << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 38a9081c93fe794..fe37cd91dabc6ad 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -30,7 +30,7 @@ class HexagonInstPrinter : public MCInstPrinter {
 
   void printInst(MCInst const *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
-  void printRegName(raw_ostream &O, MCRegister Reg) const override;
+  void printRegName(raw_ostream &O, MCRegister Reg) override;
 
   static char const *getRegisterName(MCRegister Reg);
 
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
index 0265a75fb346c9f..4b5751eaedda052 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "LanaiGenAsmWriter.inc"
 
-void LanaiInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void LanaiInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   OS << StringRef(getRegisterName(Reg)).lower();
 }
 
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
index 55a254036fee5b6..851613b27e3dd90 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h
@@ -49,7 +49,7 @@ class LanaiInstPrinter : public MCInstPrinter {
                                unsigned OpIdx, unsigned PrintMethodIdx,
                                raw_ostream &O);
   static const char *getRegisterName(MCRegister Reg);
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
 
 private:
   bool printAlias(const MCInst *MI, raw_ostream &Ostream);
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
index cb2521db5217e82..e3007cfe3d401b8 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp
@@ -56,7 +56,7 @@ void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address,
   printAnnotation(O, Annot);
 }
 
-void LoongArchInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+void LoongArchInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
   O << '$' << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
index 4e6092bfcb12829..8cda3fdb4510e59 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h
@@ -28,7 +28,7 @@ class LoongArchInstPrinter : public MCInstPrinter {
 
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
-  void printRegName(raw_ostream &O, MCRegister Reg) const override;
+  void printRegName(raw_ostream &O, MCRegister Reg) override;
   void printAtomicMemOp(const MCInst *MI, unsigned OpNo,
                         const MCSubtargetInfo &STI, raw_ostream &O);
 
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
index 84800fc762cbb81..68ac15b57508c18 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
@@ -41,7 +41,7 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "M68kGenAsmWriter.inc"
 
-void M68kInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void M68kInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   OS << "%" << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
index 0963176304587c2..d6d17ca9568e024 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
@@ -34,7 +34,7 @@ class M68kInstPrinter : public MCInstPrinter,
   void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
   static const char *getRegisterName(MCRegister Reg);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
 
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
index 3726c600f4a7b82..d8a27f34c6fd13f 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "MSP430GenAsmWriter.inc"
 
-void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
   O << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
index 40605b92bcb01a6..413492b8efeedaf 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -22,7 +22,7 @@ namespace llvm {
                       const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-    void printRegName(raw_ostream &O, MCRegister Reg) const override;
+    void printRegName(raw_ostream &O, MCRegister Reg) override;
 
     void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                    const MCSubtargetInfo &STI, raw_ostream &O) override;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
index 1518a539782efb5..2fd1b344eb687ed 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp
@@ -72,7 +72,7 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) {
   llvm_unreachable("Impossible condition code!");
 }
 
-void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   markup(OS, Markup::Register)
       << '$' << StringRef(getRegisterName(Reg)).lower();
 }
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
index 0652b237509fe3f..8e3b4614a4aade6 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h
@@ -84,7 +84,7 @@ class MipsInstPrinter : public MCInstPrinter {
                         const MCSubtargetInfo &STI, raw_ostream &O);
   static const char *getRegisterName(MCRegister Reg);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
 
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 9b589284463294c..4211ae5a2eebcde 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -34,7 +34,7 @@ NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                                    const MCRegisterInfo &MRI)
     : MCInstPrinter(MAI, MII, MRI) {}
 
-void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   // Decode the virtual register
   // Must be kept in sync with NVPTXAsmPrinter::encodeVirtualRegister
   unsigned RCId = (Reg.id() >> 28);
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index e8a4a6dbdd5324b..63207e8a975ace2 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -24,7 +24,7 @@ class NVPTXInstPrinter : public MCInstPrinter {
   NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                    const MCRegisterInfo &MRI);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 93c2d92ef7c1c84..965ed98630a28d9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -126,8 +126,6 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
     if (tryLoadVector(N))
       return;
     break;
-  case NVPTXISD::LDGV2:
-  case NVPTXISD::LDGV4:
   case NVPTXISD::LDUV2:
   case NVPTXISD::LDUV4:
     if (tryLDGLDU(N))
@@ -550,9 +548,6 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
   switch (IID) {
   default:
     return false;
-  case Intrinsic::nvvm_ldg_global_f:
-  case Intrinsic::nvvm_ldg_global_i:
-  case Intrinsic::nvvm_ldg_global_p:
   case Intrinsic::nvvm_ldu_global_f:
   case Intrinsic::nvvm_ldu_global_i:
   case Intrinsic::nvvm_ldu_global_p:
@@ -1559,34 +1554,11 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
 }
 
 bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
-  SDValue Op1;
-  MemSDNode *Mem;
-  bool IsLDG = true;
+  auto *Mem = cast<MemSDNode>(N);
 
   // If this is an LDG intrinsic, the address is the third operand. If its an
   // LDG/LDU SD node (from custom vector handling), then its the second operand
-  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
-    Op1 = N->getOperand(2);
-    Mem = cast<MemIntrinsicSDNode>(N);
-    unsigned IID = N->getConstantOperandVal(1);
-    switch (IID) {
-    default:
-      return false;
-    case Intrinsic::nvvm_ldg_global_f:
-    case Intrinsic::nvvm_ldg_global_i:
-    case Intrinsic::nvvm_ldg_global_p:
-      IsLDG = true;
-      break;
-    case Intrinsic::nvvm_ldu_global_f:
-    case Intrinsic::nvvm_ldu_global_i:
-    case Intrinsic::nvvm_ldu_global_p:
-      IsLDG = false;
-      break;
-    }
-  } else {
-    Op1 = N->getOperand(1);
-    Mem = cast<MemSDNode>(N);
-  }
+  SDValue Op1 = N->getOperand(N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 2 : 1);
 
   EVT OrigType = N->getValueType(0);
   EVT EltVT = Mem->getMemoryVT();
@@ -1629,26 +1601,20 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
     default:
       return false;
     case ISD::LOAD:
+      Opcode = pickOpcodeForVT(
+          EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
+          NVPTX::INT_PTX_LDG_GLOBAL_i16avar, NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
+          NVPTX::INT_PTX_LDG_GLOBAL_i64avar, NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
+          NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
+      break;
     case ISD::INTRINSIC_W_CHAIN:
-      if (IsLDG)
-        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
-                                 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
-                                 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
-                                 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
-                                 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
-                                 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
-      else
-        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
-                                 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
-                                 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
-                                 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
-                                 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
-                                 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
+      Opcode = pickOpcodeForVT(
+          EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
+          NVPTX::INT_PTX_LDU_GLOBAL_i16avar, NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
+          NVPTX::INT_PTX_LDU_GLOBAL_i64avar, NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
+          NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
       break;
     case NVPTXISD::LoadV2:
-    case NVPTXISD::LDGV2:
       Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
                                NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
                                NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
@@ -1667,7 +1633,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
                                NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
       break;
     case NVPTXISD::LoadV4:
-    case NVPTXISD::LDGV4:
       Opcode = pickOpcodeForVT(
           EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
           NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
@@ -1693,26 +1658,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
       default:
         return false;
       case ISD::LOAD:
+        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
+        break;
       case ISD::INTRINSIC_W_CHAIN:
-        if (IsLDG)
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
-        else
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
+        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
         break;
       case NVPTXISD::LoadV2:
-      case NVPTXISD::LDGV2:
         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
                                      NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
                                      NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
@@ -1731,7 +1694,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
                                      NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
         break;
       case NVPTXISD::LoadV4:
-      case NVPTXISD::LDGV4:
         Opcode = pickOpcodeForVT(
             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
             NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
@@ -1751,26 +1713,20 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
       default:
         return false;
       case ISD::LOAD:
+        Opcode = pickOpcodeForVT(
+            EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
+            NVPTX::INT_PTX_LDG_GLOBAL_i16ari, NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
+            NVPTX::INT_PTX_LDG_GLOBAL_i64ari, NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
+            NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
+        break;
       case ISD::INTRINSIC_W_CHAIN:
-        if (IsLDG)
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
-        else
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
+        Opcode = pickOpcodeForVT(
+            EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
+            NVPTX::INT_PTX_LDU_GLOBAL_i16ari, NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
+            NVPTX::INT_PTX_LDU_GLOBAL_i64ari, NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
+            NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
         break;
       case NVPTXISD::LoadV2:
-      case NVPTXISD::LDGV2:
         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
                                  NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
                                  NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
@@ -1789,7 +1745,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
                                  NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
         break;
       case NVPTXISD::LoadV4:
-      case NVPTXISD::LDGV4:
         Opcode = pickOpcodeForVT(
             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
             NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
@@ -1815,26 +1770,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
       default:
         return false;
       case ISD::LOAD:
+        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
+        break;
       case ISD::INTRINSIC_W_CHAIN:
-        if (IsLDG)
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
-                                       NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
-        else
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
-                                       NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
+        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
         break;
       case NVPTXISD::LoadV2:
-      case NVPTXISD::LDGV2:
         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
                                      NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
                                      NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
@@ -1853,7 +1806,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
                                      NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
         break;
       case NVPTXISD::LoadV4:
-      case NVPTXISD::LDGV4:
         Opcode = pickOpcodeForVT(
             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
             NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
@@ -1873,26 +1825,24 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
       default:
         return false;
       case ISD::LOAD:
+        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
+                                 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
+        break;
       case ISD::INTRINSIC_W_CHAIN:
-        if (IsLDG)
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
-                                   NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
-        else
-          Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
-                                   NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
+        Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
+                                 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
         break;
       case NVPTXISD::LoadV2:
-      case NVPTXISD::LDGV2:
         Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
                                  NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
                                  NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
@@ -1911,7 +1861,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
                                  NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
         break;
       case NVPTXISD::LoadV4:
-      case NVPTXISD::LDGV4:
         Opcode = pickOpcodeForVT(
             EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
             NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 57bc5fe0ac361c2..a95cba586b8fc39 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -949,8 +949,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(NVPTXISD::ProxyReg)
     MAKE_CASE(NVPTXISD::LoadV2)
     MAKE_CASE(NVPTXISD::LoadV4)
-    MAKE_CASE(NVPTXISD::LDGV2)
-    MAKE_CASE(NVPTXISD::LDGV4)
     MAKE_CASE(NVPTXISD::LDUV2)
     MAKE_CASE(NVPTXISD::LDUV4)
     MAKE_CASE(NVPTXISD::StoreV2)
@@ -4774,26 +4772,6 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
 
     return true;
   }
-  case Intrinsic::nvvm_ldg_global_i:
-  case Intrinsic::nvvm_ldg_global_f:
-  case Intrinsic::nvvm_ldg_global_p: {
-    auto &DL = I.getDataLayout();
-
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    if (Intrinsic == Intrinsic::nvvm_ldg_global_i)
-      Info.memVT = getValueType(DL, I.getType());
-    else if(Intrinsic == Intrinsic::nvvm_ldg_global_p)
-      Info.memVT = getPointerTy(DL);
-    else
-      Info.memVT = getValueType(DL, I.getType());
-    Info.ptrVal = I.getArgOperand(0);
-    Info.offset = 0;
-    Info.flags = MachineMemOperand::MOLoad;
-    Info.align = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue();
-
-    return true;
-  }
-
   case Intrinsic::nvvm_tex_1d_v4f32_s32:
   case Intrinsic::nvvm_tex_1d_v4f32_f32:
   case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
@@ -6308,9 +6286,6 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
   switch (IntrinNo) {
   default:
     return;
-  case Intrinsic::nvvm_ldg_global_i:
-  case Intrinsic::nvvm_ldg_global_f:
-  case Intrinsic::nvvm_ldg_global_p:
   case Intrinsic::nvvm_ldu_global_i:
   case Intrinsic::nvvm_ldu_global_f:
   case Intrinsic::nvvm_ldu_global_p: {
@@ -6339,37 +6314,11 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
       default:
         return;
       case 2:
-        switch (IntrinNo) {
-        default:
-          return;
-        case Intrinsic::nvvm_ldg_global_i:
-        case Intrinsic::nvvm_ldg_global_f:
-        case Intrinsic::nvvm_ldg_global_p:
-          Opcode = NVPTXISD::LDGV2;
-          break;
-        case Intrinsic::nvvm_ldu_global_i:
-        case Intrinsic::nvvm_ldu_global_f:
-        case Intrinsic::nvvm_ldu_global_p:
-          Opcode = NVPTXISD::LDUV2;
-          break;
-        }
+        Opcode = NVPTXISD::LDUV2;
         LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
         break;
       case 4: {
-        switch (IntrinNo) {
-        default:
-          return;
-        case Intrinsic::nvvm_ldg_global_i:
-        case Intrinsic::nvvm_ldg_global_f:
-        case Intrinsic::nvvm_ldg_global_p:
-          Opcode = NVPTXISD::LDGV4;
-          break;
-        case Intrinsic::nvvm_ldu_global_i:
-        case Intrinsic::nvvm_ldu_global_f:
-        case Intrinsic::nvvm_ldu_global_p:
-          Opcode = NVPTXISD::LDUV4;
-          break;
-        }
+        Opcode = NVPTXISD::LDUV4;
         EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
         LdResVTs = DAG.getVTList(ListVTs);
         break;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 8c3a597ce0b085b..824a659671967a0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -70,8 +70,6 @@ enum NodeType : unsigned {
 
   LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
   LoadV4,
-  LDGV2, // LDG.v2
-  LDGV4, // LDG.v4
   LDUV2, // LDU.v2
   LDUV4, // LDU.v4
   StoreV2,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 5f6cba397c53529..1ca3aefb0b09344 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1922,7 +1922,7 @@ def imem : Operand<iPTR> {
   let PrintMethod = "printOperand";
 }
 
-def imemAny : Operand<iPTRAny> {
+def imemAny : Operand<pAny> {
   let PrintMethod = "printOperand";
 }
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 1d6f39b290536af..a5a147da8da1c5b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -332,7 +332,7 @@ void NVPTXPassConfig::addIRPasses() {
   disablePass(&PrologEpilogCodeInserterID);
   disablePass(&MachineLateInstrsCleanupID);
   disablePass(&MachineCopyPropagationID);
-  disablePass(&TailDuplicateID);
+  disablePass(&TailDuplicateLegacyID);
   disablePass(&StackMapLivenessID);
   disablePass(&PostRAMachineSinkingID);
   disablePass(&PostRASchedulerID);
@@ -461,7 +461,7 @@ void NVPTXPassConfig::addOptimizedRegAlloc() {
 
 void NVPTXPassConfig::addMachineSSAOptimization() {
   // Pre-ra tail duplication.
-  if (addPass(&EarlyTailDuplicateID))
+  if (addPass(&EarlyTailDuplicateLegacyID))
     printAndVerify("After Pre-RegAlloc TailDuplicate");
 
   // Optimize PHIs before DCE: removing dead PHI cycles may make more
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 9a4291c90408d60..7511e24f705c18b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -47,7 +47,7 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden,
 #define PRINT_ALIAS_INSTR
 #include "PPCGenAsmWriter.inc"
 
-void PPCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void PPCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   const char *RegName = getRegisterName(Reg);
   OS << RegName;
 }
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index 6ba3eb4c79dc990..1b9365fa04961c9 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -31,7 +31,7 @@ class PPCInstPrinter : public MCInstPrinter {
                  const MCRegisterInfo &MRI, Triple T)
     : MCInstPrinter(MAI, MII, MRI), TT(T) {}
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index ab31898e262e7ed..d8f3095ed7fb68a 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -200,6 +200,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom);
 
+  // On P10, the default lowering generates better code using the
+  // setbc instruction.
+  if (!Subtarget.hasP10Vector() && isPPC64)
+    setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+
   // Match BITREVERSE to customized fast code sequence in the td file.
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
@@ -12016,6 +12021,36 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const {
   return Res;
 }
 
+SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
+
+  SDLoc dl(Op);
+
+  SDValue LHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(0));
+  SDValue RHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(1));
+
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i64, LHS64, RHS64);
+
+  SDValue Extsw = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, Sub,
+                              DAG.getValueType(MVT::i32));
+
+  SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i64, Extsw, Sub);
+
+  SDValue Addic = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(MVT::i64, MVT::Glue),
+                              Xor, DAG.getConstant(-1, dl, MVT::i64));
+
+  SDValue Overflow =
+      DAG.getNode(ISD::SUBE, dl, DAG.getVTList(MVT::i64, MVT::Glue), Xor, Addic,
+                  Addic.getValue(1));
+
+  SDValue OverflowTrunc =
+      DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
+  SDValue SubTrunc =
+      (Sub->getValueType(0) != Op.getNode()->getValueType(0))
+          ? DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(0), Sub)
+          : Sub;
+  return DAG.getMergeValues({SubTrunc, OverflowTrunc}, dl);
+}
+
 /// LowerOperation - Provide custom lowering hooks for some operations.
 ///
 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -12038,6 +12073,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
+  case ISD::SSUBO:
+    return LowerSSUBO(Op, DAG);
 
   case ISD::INLINEASM:
   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0adbad868459731..dde45e4cf6f4ae6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1279,6 +1279,7 @@ namespace llvm {
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index d82f78498418da7..b3a6cd40ea039b0 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -330,7 +330,18 @@ enum OperandType : unsigned {
   OPERAND_RVKRNUM_1_10,
   OPERAND_RVKRNUM_2_14,
   OPERAND_SPIMM,
-  OPERAND_LAST_RISCV_IMM = OPERAND_SPIMM,
+  // Operand is a 3-bit rounding mode, '111' indicates FRM register.
+  // Represents 'frm' argument passing to floating-point operations.
+  OPERAND_FRMARG,
+  // Operand is a 3-bit rounding mode where only RTZ is valid.
+  OPERAND_RTZARG,
+  // Condition code used by select and short forward branch pseudos.
+  OPERAND_COND_CODE,
+  // Vector policy operand.
+  OPERAND_VEC_POLICY,
+  // Vector SEW operand.
+  OPERAND_SEW,
+  OPERAND_LAST_RISCV_IMM = OPERAND_SEW,
   // Operand is either a register or uimm5, this is used by V extension pseudo
   // instructions to represent a value that be passed as AVL to either vsetvli
   // or vsetivli.
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index 1f27c934baf0dc4..1445e9da4a622de 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -75,7 +75,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address,
   printAnnotation(O, Annot);
 }
 
-void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
   markup(O, Markup::Register) << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index 77cc7a67e88920d..c15fd591b9e9565 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -28,7 +28,7 @@ class RISCVInstPrinter : public MCInstPrinter {
 
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
-  void printRegName(raw_ostream &O, MCRegister Reg) const override;
+  void printRegName(raw_ostream &O, MCRegister Reg) override;
 
   void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, const char *Modifier = nullptr);
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 5dcec078856eadd..eb3e1a1fe9fd5e7 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -320,34 +320,37 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
   Register Hi =
       TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
 
-  assert(MBBI->hasOneMemOperand() && "Expected mem operand");
-  MachineMemOperand *OldMMO = MBBI->memoperands().front();
-  MachineFunction *MF = MBB.getParent();
-  MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4);
-  MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4);
-
-  BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
-      .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
-      .addReg(MBBI->getOperand(1).getReg())
-      .add(MBBI->getOperand(2))
-      .setMemRefs(MMOLo);
+  auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+                   .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
+                   .addReg(MBBI->getOperand(1).getReg())
+                   .add(MBBI->getOperand(2));
 
+  MachineInstrBuilder MIBHi;
   if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
     assert(MBBI->getOperand(2).getOffset() % 8 == 0);
     MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
-    BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
-        .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
-        .add(MBBI->getOperand(1))
-        .add(MBBI->getOperand(2))
-        .setMemRefs(MMOHi);
+    MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+                .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
+                .add(MBBI->getOperand(1))
+                .add(MBBI->getOperand(2));
   } else {
     assert(isInt<12>(MBBI->getOperand(2).getImm() + 4));
-    BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
-        .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
-        .add(MBBI->getOperand(1))
-        .addImm(MBBI->getOperand(2).getImm() + 4)
-        .setMemRefs(MMOHi);
+    MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+                .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
+                .add(MBBI->getOperand(1))
+                .addImm(MBBI->getOperand(2).getImm() + 4);
+  }
+
+  if (!MBBI->memoperands_empty()) {
+    assert(MBBI->hasOneMemOperand() && "Expected mem operand");
+    MachineMemOperand *OldMMO = MBBI->memoperands().front();
+    MachineFunction *MF = MBB.getParent();
+    MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4);
+    MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4);
+    MIBLo.setMemRefs(MMOLo);
+    MIBHi.setMemRefs(MMOHi);
   }
+
   MBBI->eraseFromParent();
   return true;
 }
@@ -364,46 +367,48 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
   Register Hi =
       TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
 
-  assert(MBBI->hasOneMemOperand() && "Expected mem operand");
-  MachineMemOperand *OldMMO = MBBI->memoperands().front();
-  MachineFunction *MF = MBB.getParent();
-  MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4);
-  MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4);
+  MachineInstrBuilder MIBLo, MIBHi;
 
   // If the register of operand 1 is equal to the Lo register, then swap the
   // order of loading the Lo and Hi statements.
   bool IsOp1EqualToLo = Lo == MBBI->getOperand(1).getReg();
   // Order: Lo, Hi
   if (!IsOp1EqualToLo) {
-    BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
-        .addReg(MBBI->getOperand(1).getReg())
-        .add(MBBI->getOperand(2))
-        .setMemRefs(MMOLo);
+    MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
+                .addReg(MBBI->getOperand(1).getReg())
+                .add(MBBI->getOperand(2));
   }
 
   if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
     auto Offset = MBBI->getOperand(2).getOffset();
     assert(Offset % 8 == 0);
     MBBI->getOperand(2).setOffset(Offset + 4);
-    BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
-        .addReg(MBBI->getOperand(1).getReg())
-        .add(MBBI->getOperand(2))
-        .setMemRefs(MMOHi);
+    MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
+                .addReg(MBBI->getOperand(1).getReg())
+                .add(MBBI->getOperand(2));
     MBBI->getOperand(2).setOffset(Offset);
   } else {
     assert(isInt<12>(MBBI->getOperand(2).getImm() + 4));
-    BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
-        .addReg(MBBI->getOperand(1).getReg())
-        .addImm(MBBI->getOperand(2).getImm() + 4)
-        .setMemRefs(MMOHi);
+    MIBHi = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
+                .addReg(MBBI->getOperand(1).getReg())
+                .addImm(MBBI->getOperand(2).getImm() + 4);
   }
 
   // Order: Hi, Lo
   if (IsOp1EqualToLo) {
-    BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
-        .addReg(MBBI->getOperand(1).getReg())
-        .add(MBBI->getOperand(2))
-        .setMemRefs(MMOLo);
+    MIBLo = BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
+                .addReg(MBBI->getOperand(1).getReg())
+                .add(MBBI->getOperand(2));
+  }
+
+  if (!MBBI->memoperands_empty()) {
+    assert(MBBI->hasOneMemOperand() && "Expected mem operand");
+    MachineMemOperand *OldMMO = MBBI->memoperands().front();
+    MachineFunction *MF = MBB.getParent();
+    MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 4);
+    MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 4, 4);
+    MIBLo.setMemRefs(MMOLo);
+    MIBHi.setMemRefs(MMOHi);
   }
 
   MBBI->eraseFromParent();
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 7ccce2e1c70b226..1e4bf1b8830bcc7 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -733,7 +733,8 @@ def HasStdExtZfhOrZvfh
 
 def FeatureStdExtZvkb
     : RISCVExtension<"zvkb", 1, 0,
-                     "'Zvkb' (Vector Bit-manipulation used in Cryptography)">,
+                     "'Zvkb' (Vector Bit-manipulation used in Cryptography)",
+                     [FeatureStdExtZve32x]>,
       RISCVExtensionBitmask<0, 52>;
 def HasStdExtZvkb : Predicate<"Subtarget->hasStdExtZvkb()">,
                     AssemblerPredicate<(all_of FeatureStdExtZvkb),
@@ -750,7 +751,8 @@ def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">,
 
 def FeatureStdExtZvbc
     : RISCVExtension<"zvbc", 1, 0,
-                     "'Zvbc' (Vector Carryless Multiplication)">,
+                     "'Zvbc' (Vector Carryless Multiplication)",
+                     [FeatureStdExtZve64x]>,
       RISCVExtensionBitmask<0, 49>;
 def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">,
                     AssemblerPredicate<(all_of FeatureStdExtZvbc),
@@ -758,7 +760,8 @@ def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">,
 
 def FeatureStdExtZvbc32e
     : RISCVExperimentalExtension<"zvbc32e", 0, 7,
-                                 "'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements)">;
+                                 "'Zvbc32e' (Vector Carryless Multiplication with 32-bits elements)",
+                                 [FeatureStdExtZve32x]>;
 
 def HasStdExtZvbcOrZvbc32e : Predicate<"Subtarget->hasStdExtZvbc() || Subtarget->hasStdExtZvbc32e()">,
                              AssemblerPredicate<(any_of FeatureStdExtZvbc, FeatureStdExtZvbc32e),
@@ -766,7 +769,8 @@ def HasStdExtZvbcOrZvbc32e : Predicate<"Subtarget->hasStdExtZvbc() || Subtarget-
 
 def FeatureStdExtZvkg
     : RISCVExtension<"zvkg", 1, 0,
-                     "'Zvkg' (Vector GCM instructions for Cryptography)">,
+                     "'Zvkg' (Vector GCM instructions for Cryptography)",
+                     [FeatureStdExtZve32x]>,
       RISCVExtensionBitmask<0, 53>;
 def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">,
                     AssemblerPredicate<(all_of FeatureStdExtZvkg),
@@ -782,7 +786,8 @@ def HasStdExtZvkgs : Predicate<"Subtarget->hasStdExtZvkgs()">,
 
 def FeatureStdExtZvkned
     : RISCVExtension<"zvkned", 1, 0,
-                     "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">,
+                     "'Zvkned' (Vector AES Encryption & Decryption (Single Round))",
+                     [FeatureStdExtZve32x]>,
       RISCVExtensionBitmask<0, 54>;
 def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">,
                       AssemblerPredicate<(all_of FeatureStdExtZvkned),
@@ -790,7 +795,8 @@ def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">,
 
 def FeatureStdExtZvknha
     : RISCVExtension<"zvknha", 1, 0,
-                     "'Zvknha' (Vector SHA-2 (SHA-256 only))">,
+                     "'Zvknha' (Vector SHA-2 (SHA-256 only))",
+                     [FeatureStdExtZve32x]>,
       RISCVExtensionBitmask<0, 55>;
 def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">,
                       AssemblerPredicate<(all_of FeatureStdExtZvknha),
@@ -798,7 +804,8 @@ def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">,
 
 def FeatureStdExtZvknhb
     : RISCVExtension<"zvknhb", 1, 0,
-                     "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))">,
+                     "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
+                     [FeatureStdExtZve64x]>,
       RISCVExtensionBitmask<0, 56>;
 def HasStdExtZvknhb : Predicate<"Subtarget->hasStdExtZvknhb()">,
                       AssemblerPredicate<(all_of FeatureStdExtZvknhb),
@@ -810,7 +817,8 @@ def HasStdExtZvknhaOrZvknhb : Predicate<"Subtarget->hasStdExtZvknha() || Subtarg
 
 def FeatureStdExtZvksed
     : RISCVExtension<"zvksed", 1, 0,
-                     "'Zvksed' (SM4 Block Cipher Instructions)">,
+                     "'Zvksed' (SM4 Block Cipher Instructions)",
+                     [FeatureStdExtZve32x]>,
       RISCVExtensionBitmask<0, 57>;
 def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">,
                       AssemblerPredicate<(all_of FeatureStdExtZvksed),
@@ -818,7 +826,8 @@ def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">,
 
 def FeatureStdExtZvksh
     : RISCVExtension<"zvksh", 1, 0,
-                     "'Zvksh' (SM3 Hash Function Instructions)">,
+                     "'Zvksh' (SM3 Hash Function Instructions)",
+                     [FeatureStdExtZve32x]>,
       RISCVExtensionBitmask<0, 58>;
 def HasStdExtZvksh : Predicate<"Subtarget->hasStdExtZvksh()">,
                      AssemblerPredicate<(all_of FeatureStdExtZvksh),
@@ -1029,39 +1038,50 @@ def FeatureStdExtSvpbmt
     : RISCVExtension<"svpbmt", 1, 0,
                      "'Svpbmt' (Page-Based Memory Types)">;
 
+def FeatureStdExtSvvptc
+    : RISCVExtension<"svvptc", 1, 0,
+                     "'svvptc' (Obviating Memory-Management Instructions after Marking PTEs Valid)">;
+
+def FeatureStdExtSha
+    : RISCVExtension<"sha", 1, 0,
+                     "'Sha' (Augmented Hypervisor)",
+                     [FeatureStdExtH, FeatureStdExtSsstateen, FeatureStdExtShcounterenw,
+                      FeatureStdExtShvstvala, FeatureStdExtShtvala, FeatureStdExtShvstvecd,
+                      FeatureStdExtShvsatpa, FeatureStdExtShgatpa]>;
+
 // Pointer Masking extensions
 
 // A supervisor-level extension that provides pointer masking for the next lower
 // privilege mode (U-mode), and for VS- and VU-modes if the H extension is
 // present.
 def FeatureStdExtSsnpm
-    : RISCVExperimentalExtension<"ssnpm", 1, 0,
-                                 "'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode)">;
+    : RISCVExtension<"ssnpm", 1, 0,
+                     "'Ssnpm' (Supervisor-level Pointer Masking for next lower privilege mode)">;
 
 // A machine-level extension that provides pointer masking for the next lower
 // privilege mode (S/HS if S-mode is implemented, or U-mode otherwise).
 def FeatureStdExtSmnpm
-    : RISCVExperimentalExtension<"smnpm", 1, 0,
-                                 "'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)">;
+    : RISCVExtension<"smnpm", 1, 0,
+                     "'Smnpm' (Machine-level Pointer Masking for next lower privilege mode)">;
 
 // A machine-level extension that provides pointer masking for M-mode.
 def FeatureStdExtSmmpm
-    : RISCVExperimentalExtension<"smmpm", 1, 0,
-                                 "'Smmpm' (Machine-level Pointer Masking for M-mode)">;
+    : RISCVExtension<"smmpm", 1, 0,
+                     "'Smmpm' (Machine-level Pointer Masking for M-mode)">;
 
 // An extension that indicates that there is pointer-masking support available
 // in supervisor mode, with some facility provided in the supervisor execution
 // environment to control pointer masking.
 def FeatureStdExtSspm
-    : RISCVExperimentalExtension<"sspm", 1, 0,
-                                 "'Sspm' (Indicates Supervisor-mode Pointer Masking)">;
+    : RISCVExtension<"sspm", 1, 0,
+                     "'Sspm' (Indicates Supervisor-mode Pointer Masking)">;
 
 // An extension that indicates that there is pointer-masking support available
 // in user mode, with some facility provided in the application execution
 // environment to control pointer masking.
 def FeatureStdExtSupm
-    : RISCVExperimentalExtension<"supm", 1, 0,
-                                 "'Supm' (Indicates User-mode Pointer Masking)">;
+    : RISCVExtension<"supm", 1, 0,
+                     "'Supm' (Indicates User-mode Pointer Masking)">;
 
 def FeatureStdExtSmctr
     : RISCVExperimentalExtension<"smctr", 1, 0,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index b49cbab1876d79a..f5851f371545191 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -42,10 +42,19 @@ RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI)
           /*TransientStackAlignment=*/getABIStackAlignment(STI.getTargetABI())),
       STI(STI) {}
 
+// The register used to hold the frame pointer.
+static constexpr Register FPReg = RISCV::X8;
+
+// The register used to hold the stack pointer.
+static constexpr Register SPReg = RISCV::X2;
+
+// The register used to hold the return address.
+static constexpr Register RAReg = RISCV::X1;
+
 // Offsets which need to be scale by XLen representing locations of CSRs which
 // are given a fixed location by save/restore libcalls or Zcmp Push/Pop.
 static const std::pair<MCPhysReg, int8_t> FixedCSRFIMap[] = {
-    {/*ra*/ RISCV::X1, -1},   {/*s0*/ RISCV::X8, -2},
+    {/*ra*/ RAReg, -1},       {/*s0*/ FPReg, -2},
     {/*s1*/ RISCV::X9, -3},   {/*s2*/ RISCV::X18, -4},
     {/*s3*/ RISCV::X19, -5},  {/*s4*/ RISCV::X20, -6},
     {/*s5*/ RISCV::X21, -7},  {/*s6*/ RISCV::X22, -8},
@@ -187,6 +196,7 @@ static int getLibCallID(const MachineFunction &MF,
   switch (MaxReg) {
   default:
     llvm_unreachable("Something has gone wrong!");
+    // clang-format off
   case /*s11*/ RISCV::X27: return 12;
   case /*s10*/ RISCV::X26: return 11;
   case /*s9*/  RISCV::X25: return 10;
@@ -198,8 +208,9 @@ static int getLibCallID(const MachineFunction &MF,
   case /*s3*/  RISCV::X19: return 4;
   case /*s2*/  RISCV::X18: return 3;
   case /*s1*/  RISCV::X9:  return 2;
-  case /*s0*/  RISCV::X8:  return 1;
-  case /*ra*/  RISCV::X1:  return 0;
+  case /*s0*/  FPReg:  return 1;
+  case /*ra*/  RAReg:  return 0;
+    // clang-format on
   }
 }
 
@@ -284,9 +295,9 @@ getPushPopEncodingAndNum(const Register MaxReg) {
     return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S2, 4);
   case RISCV::X9: /*s1*/
     return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S1, 3);
-  case RISCV::X8: /*s0*/
+  case FPReg: /*s0*/
     return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0, 2);
-  case RISCV::X1: /*ra*/
+  case RAReg: /*ra*/
     return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA, 1);
   }
 }
@@ -372,12 +383,6 @@ uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
   return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign());
 }
 
-// Returns the register used to hold the frame pointer.
-static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; }
-
-// Returns the register used to hold the stack pointer.
-static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
-
 static SmallVector<CalleeSavedInfo, 8>
 getUnmanagedCSI(const MachineFunction &MF,
                 const std::vector<CalleeSavedInfo> &CSI) {
@@ -415,8 +420,6 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF,
                                            MachineInstr::MIFlag Flag) const {
   assert(Amount != 0 && "Did not need to adjust stack pointer for RVV.");
 
-  const Register SPReg = getSPReg(STI);
-
   // Optimize compile time offset case
   StackOffset Offset = StackOffset::getScalable(Amount);
   if (auto VLEN = STI.getRealVLen()) {
@@ -479,7 +482,7 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
   unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
   Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
   Expr.push_back(0);
-  if (Reg == RISCV::X2)
+  if (Reg == SPReg)
     Comment << "sp";
   else
     Comment << printReg(Reg, &TRI);
@@ -530,8 +533,6 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   const RISCVInstrInfo *TII = STI.getInstrInfo();
   MachineBasicBlock::iterator MBBI = MBB.begin();
 
-  Register FPReg = getFPReg(STI);
-  Register SPReg = getSPReg(STI);
   Register BPReg = RISCVABI::getBPReg();
 
   // Debug location must be unknown since the first debug location is used
@@ -762,8 +763,6 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF,
                                          int64_t CFAOffset) const {
   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
 
-  Register SPReg = getSPReg(STI);
-
   RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
                 MachineInstr::FrameDestroy, getStackAlign());
 }
@@ -773,8 +772,6 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
   const RISCVRegisterInfo *RI = STI.getRegisterInfo();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
-  Register FPReg = getFPReg(STI);
-  Register SPReg = getSPReg(STI);
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
@@ -922,7 +919,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   }
 
   if (FI >= MinCSFI && FI <= MaxCSFI) {
-    FrameReg = RISCV::X2;
+    FrameReg = SPReg;
 
     if (FirstSPAdjustAmount)
       Offset += StackOffset::getFixed(FirstSPAdjustAmount);
@@ -969,13 +966,13 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
     } else {
       // VarSize objects must be empty in this case!
       assert(!MFI.hasVarSizedObjects());
-      FrameReg = RISCV::X2;
+      FrameReg = SPReg;
     }
   } else {
     FrameReg = RI->getFrameRegister(MF);
   }
 
-  if (FrameReg == getFPReg(STI)) {
+  if (FrameReg == FPReg) {
     Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
     // When using FP to access scalable vector objects, we need to minus
     // the frame size.
@@ -1067,8 +1064,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // Unconditionally spill RA and FP only if the function uses a frame
   // pointer.
   if (hasFP(MF)) {
-    SavedRegs.set(RISCV::X1);
-    SavedRegs.set(RISCV::X8);
+    SavedRegs.set(RAReg);
+    SavedRegs.set(FPReg);
   }
   // Mark BP as used if function has dedicated base pointer.
   if (hasBP(MF))
@@ -1133,23 +1130,23 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFunction &MF) const {
 
   uint64_t StackSize = Offset;
 
-  // Multiply by vscale.
-  if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
-    StackSize *= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
-
   // Ensure the alignment of the RVV stack. Since we want the most-aligned
   // object right at the bottom (i.e., any padding at the top of the frame),
   // readjust all RVV objects down by the alignment padding.
-  if (auto AlignmentPadding = offsetToAlignment(StackSize, RVVStackAlign)) {
-    StackSize += AlignmentPadding;
-    for (int FI : ObjectsToAllocate)
-      MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
+  // Stack size and offsets are multiples of vscale, stack alignment is in
+  // bytes, we can divide stack alignment by minimum vscale to get a maximum
+  // stack alignment multiple of vscale.
+  auto VScale =
+      std::max<uint64_t>(ST.getRealMinVLen() / RISCV::RVVBitsPerBlock, 1);
+  if (auto RVVStackAlignVScale = RVVStackAlign.value() / VScale) {
+    if (auto AlignmentPadding =
+            offsetToAlignment(StackSize, Align(RVVStackAlignVScale))) {
+      StackSize += AlignmentPadding;
+      for (int FI : ObjectsToAllocate)
+        MFI.setObjectOffset(FI, MFI.getObjectOffset(FI) - AlignmentPadding);
+    }
   }
 
-  // Remove vscale.
-  if (ST.getRealMinVLen() >= RISCV::RVVBitsPerBlock)
-    StackSize /= ST.getRealMinVLen() / RISCV::RVVBitsPerBlock;
-
   return std::make_pair(StackSize, RVVStackAlign);
 }
 
@@ -1328,7 +1325,6 @@ bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator MI) const {
-  Register SPReg = RISCV::X2;
   DebugLoc DL = MI->getDebugLoc();
 
   if (!hasReservedCallFrame(MF)) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 69112d868bff827..af7a39b2580a372 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1081,7 +1081,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
                           ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
                           ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
-                          ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE},
+                          ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
+                          ISD::VECTOR_COMPRESS},
                          VT, Custom);
       MVT EltVT = VT.getVectorElementType();
       if (isTypeLegal(EltVT))
@@ -1333,7 +1334,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::UNDEF, VT, Custom);
 
         setOperationAction({ISD::CONCAT_VECTORS, ISD::VECTOR_REVERSE,
-                            ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+                            ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+                            ISD::VECTOR_COMPRESS},
                            VT, Custom);
 
         // FIXME: mload, mstore, mgather, mscatter, vp_load/store,
@@ -1440,8 +1442,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
              ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
              ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
             VT, Custom);
-
-        setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
       }
 
       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 3989a966edfd333..d5b086861d71e61 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -179,17 +179,10 @@ bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
   case RISCV::VMV_S_X:
   case RISCV::VFMV_S_F:
   case RISCV::VID_V:
-    if (MI.getOperand(1).isUndef() &&
-        /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
-           and vtype.  Make sure we only rematerialize before RISCVInsertVSETVLI
-           i.e. -riscv-vsetvl-after-rvv-regalloc=true */
-        !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
-      return true;
-    break;
+    return MI.getOperand(1).isUndef();
   default:
-    break;
+    return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
   }
-  return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
 }
 
 static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
@@ -2543,6 +2536,21 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
         case RISCVOp::OPERAND_SPIMM:
           Ok = (Imm & 0xf) == 0;
           break;
+        case RISCVOp::OPERAND_FRMARG:
+          Ok = RISCVFPRndMode::isValidRoundingMode(Imm);
+          break;
+        case RISCVOp::OPERAND_RTZARG:
+          Ok = Imm == RISCVFPRndMode::RTZ;
+          break;
+        case RISCVOp::OPERAND_COND_CODE:
+          Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID;
+          break;
+        case RISCVOp::OPERAND_VEC_POLICY:
+          Ok = (Imm & (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) == Imm;
+          break;
+        case RISCVOp::OPERAND_SEW:
+          Ok = Imm == 0 || (Imm >= 3 && Imm <= 6);
+          break;
         }
         if (!Ok) {
           ErrInfo = "Invalid immediate";
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 86cc638fd04ac2e..a867368235584c0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -387,6 +387,11 @@ def csr_sysreg : RISCVOp, TImmLeaf<XLenVT, "return isUInt<12>(Imm);"> {
 // A parameterized register class alternative to i32imm/i64imm from Target.td.
 def ixlenimm : Operand<XLenVT>;
 
+// Condition code used by select and short forward branch pseudos.
+def cond_code : RISCVOp {
+  let OperandType = "OPERAND_COND_CODE";
+}
+
 def ixlenimm_li : Operand<XLenVT> {
   let ParserMatchClass = ImmXLenAsmOperand<"", "LI">;
 }
@@ -1450,7 +1455,7 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
 multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
   let usesCustomInserter = 1 in
   def _Using_CC_GPR : Pseudo<(outs valty:$dst),
-                             (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                             (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                               valty:$truev, valty:$falsev),
                              [(set valty:$dst,
                                (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index a134f37c774954b..da3f207a2faf728 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -134,6 +134,8 @@ def frmarg : Operand<XLenVT> {
   let ParserMatchClass = FRMArg;
   let PrintMethod = "printFRMArg";
   let DecoderMethod = "decodeFRMArg";
+  let OperandType = "OPERAND_FRMARG";
+  let OperandNamespace = "RISCVOp";
 }
 
 // Variants of the rounding mode operand that default to 'rne'. This is used
@@ -154,6 +156,8 @@ def frmarglegacy : Operand<XLenVT> {
   let ParserMatchClass = FRMArgLegacy;
   let PrintMethod = "printFRMArgLegacy";
   let DecoderMethod = "decodeFRMArg";
+  let OperandType = "OPERAND_FRMARG";
+  let OperandNamespace = "RISCVOp";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index f25dc7302608baf..16cc0e5a61f0bc3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -15,7 +15,7 @@ let Predicates = [HasShortForwardBranchOpt], isSelect = 1,
 // This instruction moves $truev to $dst when the condition is true. It will
 // be expanded to control flow in RISCVExpandPseudoInsts.
 def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
-                            (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                            (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                              GPR:$falsev, GPR:$truev),
                             [(set GPR:$dst,
                               (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs),
@@ -34,7 +34,7 @@ let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt],
 // be expanded to control flow in RISCVExpandPseudoInsts.
 // We use GPRNoX0 because c.mv cannot encode X0.
 def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst),
-                                (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                                (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                                  GPRNoX0:$falsev, GPRNoX0:$truev),
                                 [(set GPRNoX0:$dst,
                                   (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs),
@@ -51,143 +51,143 @@ def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst),
 let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0,
     mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in {
 def PseudoCCADD : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                          ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSUB : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                          ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSLL : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                          ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSRL : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                          ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSRA : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                          ReadSFBALU, ReadSFBALU]>;
 def PseudoCCAND : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                          ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCOR  : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                          ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCXOR : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                          ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 
 def PseudoCCADDI : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 def PseudoCCSLLI : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 def PseudoCCSRLI : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 def PseudoCCSRAI : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 def PseudoCCANDI : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 def PseudoCCORI  : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 def PseudoCCXORI : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU]>;
 
 // RV64I instructions
 def PseudoCCADDW : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                           ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                           ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSLLW : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSRLW : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU, ReadSFBALU]>;
 def PseudoCCSRAW : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                           ReadSFBALU, ReadSFBALU]>;
 
 def PseudoCCADDIW : Pseudo<(outs GPR:$dst),
-                           (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                           (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                             GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                     Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                            ReadSFBALU]>;
 def PseudoCCSLLIW : Pseudo<(outs GPR:$dst),
-                           (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                           (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                             GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                     Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                            ReadSFBALU]>;
 def PseudoCCSRLIW : Pseudo<(outs GPR:$dst),
-                           (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                           (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                             GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                     Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                            ReadSFBALU]>;
 def PseudoCCSRAIW : Pseudo<(outs GPR:$dst),
-                           (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                           (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                             GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
                     Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
                            ReadSFBALU]>;
 
 // Zbb/Zbkb instructions
 def PseudoCCANDN : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                           ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCORN : Pseudo<(outs GPR:$dst),
-                         (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                         (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                           GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                   Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                          ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
 def PseudoCCXNOR : Pseudo<(outs GPR:$dst),
-                          (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
+                          (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
                            GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
                    Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
                           ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 6f7d14d5503bd3b..8e0c4826ac00de4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -104,13 +104,28 @@ class SchedCommon<list<SchedWrite> writes, list<SchedRead> reads,
                   string mx = "WorstCase", int sew = 0, bit forceMasked = 0,
                   bit forcePassthruRead = 0> : Sched<[]> {
   defvar isMasked = !ne(!find(NAME, "_MASK"), -1);
+  defvar isTied = !ne(!find(NAME, "_TIED"), -1);
   defvar isMaskedOrForceMasked = !or(forceMasked, isMasked);
+  defvar isTiedMasked = !and(isMaskedOrForceMasked, isTied);
   defvar passthruRead = !if(!or(!eq(mx, "WorstCase"), !eq(sew, 0)),
                             !cast<SchedRead>("ReadVPassthru_" # mx),
                             !cast<SchedRead>("ReadVPassthru_" # mx # "_E" #sew));
-  defvar needsPassthruRead = !or(isMaskedOrForceMasked, forcePassthruRead);
+  // We don't need passthru operand if it's already _TIED without mask.
+  defvar needsForcePassthruRead = !and(forcePassthruRead, !not(isTied));
+  defvar needsPassthruRead = !or(isMaskedOrForceMasked, needsForcePassthruRead);
+  // If this is a _TIED + masked operation, $rs2 (i.e. the first operand) is
+  // merged with the mask.
+  // NOTE: the following if statement is written in such a weird way because
+  // should we want to write something like
+  // `!if(!and(!not(!empty(reads), isTiedMasked), !tail(reads), reads)`
+  // since `!if` doesn't have a proper short-circuit behavior, if the
+  // condition of this `!if` cannot be resolved right away, `!tail(reads)` will
+  // be immediately evaluated anyway even when `reads` is empty, which leads to
+  // an assertion failure.
+  defvar readsWithTiedMask =
+      !if(isTiedMasked, !if(!not(!empty(reads)), !tail(reads), reads), reads);
   defvar readsWithMask =
-      !if(isMaskedOrForceMasked, !listconcat(reads, [ReadVMask]), reads);
+      !if(isMaskedOrForceMasked, !listconcat(readsWithTiedMask, [ReadVMask]), reads);
   defvar allReads =
       !if(needsPassthruRead, !listconcat([passthruRead], readsWithMask), reads);
   let SchedRW = !listconcat(writes, allReads);
@@ -630,31 +645,37 @@ multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
 }
 
 multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
+  // if LSB of funct6 is 1, it's a mask-producing instruction that
+  // uses a different scheduling class.
+  defvar WritePrefix = !if(funct6{0}, "WriteVICALUM", "WriteVICALU");
   def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
-           SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV">;
+           SchedBinaryMC<WritePrefix#"V", "ReadVICALUV", "ReadVICALUV">;
   def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
-           SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX">;
+           SchedBinaryMC<WritePrefix#"X", "ReadVICALUV", "ReadVICALUX">;
 }
 
 multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6>
     : VALUm_IV_V_X<opcodestr, funct6> {
+  // if LSB of funct6 is 1, it's a mask-producing instruction that
+  // uses a different scheduling class.
+  defvar WriteSched = !if(funct6{0}, "WriteVICALUMI", "WriteVICALUI");
   def IM : VALUmVI<funct6, opcodestr # ".vim">,
-           SchedUnaryMC<"WriteVICALUI", "ReadVICALUV">;
+           SchedUnaryMC<WriteSched, "ReadVICALUV">;
 }
 
 multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
   def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
-          SchedBinaryMC<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV",
+          SchedBinaryMC<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV",
                         forceMasked=0>;
   def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
-          SchedBinaryMC<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX",
+          SchedBinaryMC<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX",
                         forceMasked=0>;
 }
 
 multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6>
    : VALUNoVm_IV_V_X<opcodestr, funct6> {
   def I : VALUVINoVm<funct6, opcodestr # ".vi">,
-          SchedUnaryMC<"WriteVICALUI", "ReadVICALUV", forceMasked=0>;
+          SchedUnaryMC<"WriteVICALUMI", "ReadVICALUV", forceMasked=0>;
 }
 
 multiclass VALU_FV_F<string opcodestr, bits<6> funct6> {
@@ -1142,13 +1163,11 @@ defm VSEXT_VF2 : VALU_MV_VS2<"vsext.vf2", 0b010010, 0b00111>;
 
 // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
 defm VADC_V : VALUm_IV_V_X_I<"vadc", 0b010000>;
-let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
-defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>;
-defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>;
-} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
 defm VSBC_V : VALUm_IV_V_X<"vsbc", 0b010010>;
 let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint,
     DestEEW = EEW1 in {
+defm VMADC_V : VALUm_IV_V_X_I<"vmadc", 0b010001>;
+defm VMADC_V : VALUNoVm_IV_V_X_I<"vmadc", 0b010001>;
 defm VMSBC_V : VALUm_IV_V_X<"vmsbc", 0b010011>;
 defm VMSBC_V : VALUNoVm_IV_V_X<"vmsbc", 0b010011>;
 } // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint, DestEEW = EEW1
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 6b308bc8c9aa0fe..6ffdae1d7df2aed 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -84,6 +84,14 @@ def AVL : RegisterOperand<GPRNoX0> {
   let OperandType = "OPERAND_AVL";
 }
 
+def vec_policy : RISCVOp {
+  let OperandType = "OPERAND_VEC_POLICY";
+}
+
+def sew : RISCVOp {
+  let OperandType = "OPERAND_SEW";
+}
+
 // X0 has special meaning for vsetvl/vsetvli.
 //  rd | rs1 |   AVL value | Effect on vl
 //--------------------------------------------------------------
@@ -764,8 +772,8 @@ class GetVTypePredicates<VTypeInfo vti> {
 class VPseudoUSLoadNoMask<VReg RetClass,
                           int EEW> :
       Pseudo<(outs RetClass:$rd),
-             (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew,
-                  ixlenimm:$policy), []>,
+             (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, sew:$sew,
+                  vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -782,7 +790,7 @@ class VPseudoUSLoadMask<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   GPRMem:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -799,7 +807,7 @@ class VPseudoUSLoadFFNoMask<VReg RetClass,
                             int EEW> :
       Pseudo<(outs RetClass:$rd, GPR:$vl),
              (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -816,7 +824,7 @@ class VPseudoUSLoadFFMask<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   GPRMem:$rs1,
-                  VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$avl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -833,7 +841,7 @@ class VPseudoSLoadNoMask<VReg RetClass,
                          int EEW> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -850,7 +858,7 @@ class VPseudoSLoadMask<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   GPRMem:$rs1, GPR:$rs2,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -872,7 +880,7 @@ class VPseudoILoadNoMask<VReg RetClass,
                          int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 1;
@@ -895,7 +903,7 @@ class VPseudoILoadMask<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   GPRMem:$rs1, IdxClass:$rs2,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 1;
@@ -912,7 +920,7 @@ class VPseudoILoadMask<VReg RetClass,
 class VPseudoUSStoreNoMask<VReg StClass,
                            int EEW> :
       Pseudo<(outs),
-             (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+             (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -926,7 +934,7 @@ class VPseudoUSStoreMask<VReg StClass,
                          int EEW> :
       Pseudo<(outs),
              (ins StClass:$rd, GPRMem:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -940,7 +948,7 @@ class VPseudoSStoreNoMask<VReg StClass,
                           int EEW> :
       Pseudo<(outs),
              (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -954,7 +962,7 @@ class VPseudoSStoreMask<VReg StClass,
                         int EEW> :
       Pseudo<(outs),
              (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -967,7 +975,7 @@ class VPseudoSStoreMask<VReg StClass,
 class VPseudoNullaryNoMask<VReg RegClass> :
       Pseudo<(outs RegClass:$rd),
              (ins RegClass:$passthru,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -981,7 +989,7 @@ class VPseudoNullaryNoMask<VReg RegClass> :
 class VPseudoNullaryMask<VReg RegClass> :
       Pseudo<(outs GetVRegNoV0<RegClass>.R:$rd),
              (ins GetVRegNoV0<RegClass>.R:$passthru,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -996,7 +1004,7 @@ class VPseudoNullaryMask<VReg RegClass> :
 // Nullary for pseudo instructions. They are expanded in
 // RISCVExpandPseudoInsts pass.
 class VPseudoNullaryPseudoM<string BaseInst> :
-      Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>,
+      Pseudo<(outs VR:$rd), (ins AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1016,7 +1024,7 @@ class VPseudoUnaryNoMask<DAGOperand RetClass,
                          int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, OpClass:$rs2,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1033,7 +1041,7 @@ class VPseudoUnaryNoMaskNoPolicy<DAGOperand RetClass,
                                  string Constraint = "",
                                  int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
-             (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+             (ins OpClass:$rs2, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1050,7 +1058,7 @@ class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass,
                                      int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$rm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1070,7 +1078,7 @@ class VPseudoUnaryMask<VReg RetClass,
                        int TargetConstraintType = 1> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1090,7 +1098,7 @@ class VPseudoUnaryMaskRoundingMode<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2,
                   VMaskOp:$vm, ixlenimm:$rm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1110,7 +1118,7 @@ class VPseudoUnaryMask_NoExcept<VReg RetClass,
                                 string Constraint = ""> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []> {
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -1128,7 +1136,7 @@ class VPseudoUnaryNoMask_FRM<VReg RetClass,
                              int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$frm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1148,7 +1156,7 @@ class VPseudoUnaryMask_FRM<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, OpClass:$rs2,
                   VMaskOp:$vm, ixlenimm:$frm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1164,7 +1172,7 @@ class VPseudoUnaryMask_FRM<VReg RetClass,
 
 class VPseudoUnaryNoMaskGPROut :
       Pseudo<(outs GPR:$rd),
-             (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+             (ins VR:$rs2, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1175,7 +1183,7 @@ class VPseudoUnaryNoMaskGPROut :
 
 class VPseudoUnaryMaskGPROut :
       Pseudo<(outs GPR:$rd),
-             (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+             (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1189,7 +1197,7 @@ class VPseudoUnaryAnyMask<VReg RetClass,
                           VReg Op1Class> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, Op1Class:$rs2,
-                  VR:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VR:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1205,7 +1213,7 @@ class VPseudoBinaryNoMask<VReg RetClass,
                           string Constraint,
                           int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
-             (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+             (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1223,7 +1231,7 @@ class VPseudoBinaryNoMaskPolicy<VReg RetClass,
                                 int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1243,7 +1251,7 @@ class VPseudoBinaryNoMaskRoundingMode<VReg RetClass,
                                       int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1266,7 +1274,7 @@ class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   Op1Class:$rs2, Op2Class:$rs1,
                   VMaskOp:$vm, ixlenimm:$rm, AVL:$vl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1288,8 +1296,8 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
                               string Constraint,
                               int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
-             (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew,
-                  ixlenimm:$policy), []>,
+             (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew,
+                  vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1310,8 +1318,8 @@ class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$rs2, Op2Class:$rs1,
                   ixlenimm:$rm,
-                  AVL:$vl, ixlenimm:$sew,
-                  ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew,
+                  vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1331,7 +1339,7 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
                           bit Ordered>:
       Pseudo<(outs),
              (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
-                  ixlenimm:$sew),[]>,
+                  sew:$sew),[]>,
       RISCVVPseudo,
       RISCVVSX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 0;
@@ -1345,7 +1353,7 @@ class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
                         bit Ordered>:
       Pseudo<(outs),
              (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew),[]>,
       RISCVVPseudo,
       RISCVVSX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 0;
@@ -1363,7 +1371,7 @@ class VPseudoBinaryMaskPolicy<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   Op1Class:$rs2, Op2Class:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1382,7 +1390,7 @@ class VPseudoTernaryMaskPolicy<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   Op1Class:$rs2, Op2Class:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1401,7 +1409,7 @@ class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
                   Op1Class:$rs2, Op2Class:$rs1,
                   VMaskOp:$vm,
                   ixlenimm:$rm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1423,7 +1431,7 @@ class VPseudoBinaryMOutMask<VReg RetClass,
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru,
                   Op1Class:$rs2, Op2Class:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1445,7 +1453,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru,
                   Op2Class:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1468,7 +1476,7 @@ class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass,
                   Op2Class:$rs1,
                   VMaskOp:$vm,
                   ixlenimm:$rm,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1494,9 +1502,9 @@ class VPseudoBinaryCarry<VReg RetClass,
       Pseudo<(outs RetClass:$rd),
              !if(CarryIn,
                 (ins Op1Class:$rs2, Op2Class:$rs1,
-                     VMV0:$carry, AVL:$vl, ixlenimm:$sew),
+                     VMV0:$carry, AVL:$vl, sew:$sew),
                 (ins Op1Class:$rs2, Op2Class:$rs1,
-                     AVL:$vl, ixlenimm:$sew)), []>,
+                     AVL:$vl, sew:$sew)), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1515,7 +1523,7 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
                                int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1,
-                  VMV0:$carry, AVL:$vl, ixlenimm:$sew), []>,
+                  VMV0:$carry, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1534,7 +1542,7 @@ class VPseudoTernaryNoMask<VReg RetClass,
                            string Constraint> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1551,7 +1559,7 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
                                      int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
-                  AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1570,7 +1578,7 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
                                                  int TargetConstraintType = 1> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
-                  ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  ixlenimm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -1589,7 +1597,7 @@ class VPseudoUSSegLoadNoMask<VReg RetClass,
                              bits<4> NF> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -1606,7 +1614,7 @@ class VPseudoUSSegLoadMask<VReg RetClass,
                            bits<4> NF> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMem:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -1624,7 +1632,7 @@ class VPseudoUSSegLoadFFNoMask<VReg RetClass,
                                bits<4> NF> :
       Pseudo<(outs RetClass:$rd, GPR:$vl),
              (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -1641,7 +1649,7 @@ class VPseudoUSSegLoadFFMask<VReg RetClass,
                              bits<4> NF> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd, GPR:$vl),
              (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMem:$rs1,
-                  VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                  VMaskOp:$vm, AVL:$avl, sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -1659,7 +1667,7 @@ class VPseudoSSegLoadNoMask<VReg RetClass,
                             bits<4> NF> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, GPRMem:$rs1, GPR:$offset, AVL:$vl,
-             ixlenimm:$sew, ixlenimm:$policy), []>,
+                 sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -1676,8 +1684,8 @@ class VPseudoSSegLoadMask<VReg RetClass,
                           bits<4> NF> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMem:$rs1,
-                  GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
-                  ixlenimm:$policy), []>,
+                  GPR:$offset, VMaskOp:$vm, AVL:$vl, sew:$sew,
+                  vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 1;
@@ -1698,7 +1706,7 @@ class VPseudoISegLoadNoMask<VReg RetClass,
                             bit Ordered> :
       Pseudo<(outs RetClass:$rd),
              (ins RetClass:$passthru, GPRMem:$rs1, IdxClass:$offset, AVL:$vl,
-                  ixlenimm:$sew, ixlenimm:$policy), []>,
+                  sew:$sew, vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 1;
@@ -1720,8 +1728,8 @@ class VPseudoISegLoadMask<VReg RetClass,
                           bit Ordered> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
              (ins GetVRegNoV0<RetClass>.R:$passthru, GPRMem:$rs1,
-                  IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
-                  ixlenimm:$policy), []>,
+                  IdxClass:$offset, VMaskOp:$vm, AVL:$vl, sew:$sew,
+                  vec_policy:$policy), []>,
       RISCVVPseudo,
       RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 1;
@@ -1740,7 +1748,7 @@ class VPseudoUSSegStoreNoMask<VReg ValClass,
                               int EEW,
                               bits<4> NF> :
       Pseudo<(outs),
-             (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+             (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -1755,7 +1763,7 @@ class VPseudoUSSegStoreMask<VReg ValClass,
                             bits<4> NF> :
       Pseudo<(outs),
              (ins ValClass:$rd, GPRMem:$rs1,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -1770,7 +1778,7 @@ class VPseudoSSegStoreNoMask<VReg ValClass,
                              bits<4> NF> :
       Pseudo<(outs),
              (ins ValClass:$rd, GPRMem:$rs1, GPR:$offset,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -1785,7 +1793,7 @@ class VPseudoSSegStoreMask<VReg ValClass,
                            bits<4> NF> :
       Pseudo<(outs),
              (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
   let mayLoad = 0;
@@ -1803,7 +1811,7 @@ class VPseudoISegStoreNoMask<VReg ValClass,
                              bit Ordered> :
       Pseudo<(outs),
              (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 0;
@@ -1821,7 +1829,7 @@ class VPseudoISegStoreMask<VReg ValClass,
                            bit Ordered> :
       Pseudo<(outs),
              (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
-                  VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+                  VMaskOp:$vm, AVL:$vl, sew:$sew), []>,
       RISCVVPseudo,
       RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
   let mayLoad = 0;
@@ -3072,13 +3080,13 @@ multiclass VPseudoVCALUM_VM_XM_IM {
     defvar mx = m.MX;
     defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=constraint,
                                 Commutable=1, TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
+              SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
                           forcePassthruRead=true>;
     defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=constraint, TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
+              SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
                           forcePassthruRead=true>;
     defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=constraint, TargetConstraintType=2>,
-              SchedUnary<"WriteVICALUI", "ReadVICALUV", mx, forceMasked=1,
+              SchedUnary<"WriteVICALUMI", "ReadVICALUV", mx, forceMasked=1,
                           forcePassthruRead=true>;
   }
 }
@@ -3089,11 +3097,11 @@ multiclass VPseudoVCALUM_VM_XM {
     defvar mx = m.MX;
     defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=constraint,
                                 TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
+              SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx, forceMasked=1,
                           forcePassthruRead=true>;
     defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=constraint,
                                 TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
+              SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx, forceMasked=1,
                           forcePassthruRead=true>;
   }
 }
@@ -3104,13 +3112,13 @@ multiclass VPseudoVCALUM_V_X_I {
     defvar mx = m.MX;
     defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=constraint,
                                 Commutable=1, TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+              SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx,
                           forcePassthruRead=true>;
     defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=constraint, TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+              SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx,
                           forcePassthruRead=true>;
     defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=0, Constraint=constraint>,
-              SchedUnary<"WriteVICALUI", "ReadVICALUV", mx,
+              SchedUnary<"WriteVICALUMI", "ReadVICALUV", mx,
                           forcePassthruRead=true>;
   }
 }
@@ -3120,10 +3128,10 @@ multiclass VPseudoVCALUM_V_X {
   foreach m = MxList in {
     defvar mx = m.MX;
     defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=constraint, TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUV", "ReadVICALUV", "ReadVICALUV", mx,
+              SchedBinary<"WriteVICALUMV", "ReadVICALUV", "ReadVICALUV", mx,
                           forcePassthruRead=true>;
     defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=constraint, TargetConstraintType=2>,
-              SchedBinary<"WriteVICALUX", "ReadVICALUV", "ReadVICALUX", mx,
+              SchedBinary<"WriteVICALUMX", "ReadVICALUV", "ReadVICALUX", mx,
                           forcePassthruRead=true>;
   }
 }
@@ -6762,13 +6770,13 @@ let Predicates = [HasVInstructions] in {
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
   let HasSEWOp = 1, BaseInstr = VMV_X_S in
   def PseudoVMV_X_S:
-    Pseudo<(outs GPR:$rd), (ins VR:$rs2, ixlenimm:$sew), []>,
+    Pseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew), []>,
     Sched<[WriteVMovXS, ReadVMovXS]>,
     RISCVVPseudo;
   let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1,
       Constraints = "$rd = $rs1" in
   def PseudoVMV_S_X: Pseudo<(outs VR:$rd),
-                            (ins VR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),
+                            (ins VR:$rs1, GPR:$rs2, AVL:$vl, sew:$sew),
                             []>,
     Sched<[WriteVMovSX, ReadVMovSX_V, ReadVMovSX_X]>,
     RISCVVPseudo;
@@ -6785,14 +6793,14 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
     let HasSEWOp = 1, BaseInstr = VFMV_F_S in
     def "PseudoVFMV_" # f.FX # "_S" :
       Pseudo<(outs f.fprclass:$rd),
-             (ins VR:$rs2, ixlenimm:$sew), []>,
+             (ins VR:$rs2, sew:$sew), []>,
       Sched<[WriteVMovFS, ReadVMovFS]>,
       RISCVVPseudo;
     let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1,
         Constraints = "$rd = $rs1" in
     def "PseudoVFMV_S_" # f.FX :
       Pseudo<(outs VR:$rd),
-             (ins VR:$rs1, f.fprclass:$rs2, AVL:$vl, ixlenimm:$sew),
+             (ins VR:$rs1, f.fprclass:$rs2, AVL:$vl, sew:$sew),
              []>,
       Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>,
       RISCVVPseudo;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index b54baa16d9286ba..4478e2461110806 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -818,7 +818,7 @@ let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in {
 
   let usesCustomInserter = 1 in
   def Select_GPR_Using_CC_Imm : Pseudo<(outs GPR:$dst),
-                             (ins GPR:$lhs, simm5:$imm5, ixlenimm:$cc,
+                             (ins GPR:$lhs, simm5:$imm5, cond_code:$cc,
                               GPR:$truev, GPR:$falsev), []>;
 
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 5068d0be0fb49bd..81467ada004487e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -230,7 +230,7 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf
 class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -243,7 +243,7 @@ class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class> :
 class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -257,7 +257,7 @@ class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
                     DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -270,7 +270,7 @@ class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
 class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, payload5:$rs2, RS1Class:$r1,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -284,7 +284,7 @@ class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
                      DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -298,7 +298,7 @@ class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
                       DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1,
-                  AVL:$vl, ixlenimm:$sew), []>,
+                  AVL:$vl, sew:$sew), []>,
       RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
index f62a7e1221122b8..f13b3e69f84f899 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -35,6 +35,8 @@ def LoadFPImmOperand : AsmOperandClass {
 def loadfpimm : Operand<XLenVT> {
   let ParserMatchClass = LoadFPImmOperand;
   let PrintMethod = "printFPImmOperand";
+  let OperandType = "OPERAND_UIMM5";
+  let OperandNamespace = "RISCVOp";
 }
 
 def RTZArg : AsmOperandClass {
@@ -48,6 +50,8 @@ def rtzarg : Operand<XLenVT> {
   let ParserMatchClass = RTZArg;
   let PrintMethod = "printFRMArg";
   let DecoderMethod = "decodeRTZArg";
+  let OperandType = "OPERAND_RTZARG";
+  let OperandNamespace = "RISCVOp";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
index 7ec13e4eaafa7d3..782651fd6d01975 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -231,7 +231,7 @@ class ZvkMxSet<string vd_lmul> {
 
 class VPseudoBinaryNoMask_Zvk<DAGOperand RetClass, VReg OpClass> :
       Pseudo<(outs RetClass:$rd_wb),
-        (ins RetClass:$rd, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+        (ins RetClass:$rd, OpClass:$rs2, AVL:$vl, sew:$sew, vec_policy:$policy), []>,
         RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
@@ -248,7 +248,7 @@ class VPseudoTernaryNoMask_Zvk<VReg RetClass,
                                DAGOperand Op2Class> :
         Pseudo<(outs RetClass:$rd_wb),
                (ins RetClass:$rd, Op1Class:$rs2, Op2Class:$rs1,
-                    AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+                    AVL:$vl, sew:$sew, vec_policy:$policy), []>,
         RISCVVPseudo {
   let mayLoad = 0;
   let mayStore = 0;
diff --git a/llvm/lib/Target/RISCV/RISCVProfiles.td b/llvm/lib/Target/RISCV/RISCVProfiles.td
index 157e087a64da07b..cbf2a2eddf38ed8 100644
--- a/llvm/lib/Target/RISCV/RISCVProfiles.td
+++ b/llvm/lib/Target/RISCV/RISCVProfiles.td
@@ -73,7 +73,8 @@ defvar RVA23U64Features = !listconcat(RVA22U64Features,
                                        FeatureStdExtZcmop,
                                        FeatureStdExtZcb,
                                        FeatureStdExtZfa,
-                                       FeatureStdExtZawrs]);
+                                       FeatureStdExtZawrs,
+                                       FeatureStdExtSupm]);
 
 defvar RVA23S64BaseFeatures = !listconcat(RVA22S64BaseFeatures,
                                           [FeatureStdExtSvnapot,
@@ -81,14 +82,7 @@ defvar RVA23S64BaseFeatures = !listconcat(RVA22S64BaseFeatures,
                                            FeatureStdExtSscofpmf,
                                            FeatureStdExtSsnpm,
                                            FeatureStdExtSsu64xl,
-                                           FeatureStdExtH,
-                                           FeatureStdExtSsstateen,
-                                           FeatureStdExtShcounterenw,
-                                           FeatureStdExtShvstvala,
-                                           FeatureStdExtShtvala,
-                                           FeatureStdExtShvstvecd,
-                                           FeatureStdExtShvsatpa,
-                                           FeatureStdExtShgatpa]);
+                                           FeatureStdExtSha]);
 defvar RVA23S64Features = !listconcat(RVA23U64Features,
                                       RVA23S64BaseFeatures);
 
@@ -167,8 +161,8 @@ def RVA20U64 : RISCVProfile<"rva20u64", RVA20U64Features>;
 def RVA20S64 : RISCVProfile<"rva20s64", RVA20S64Features>;
 def RVA22U64 : RISCVProfile<"rva22u64", RVA22U64Features>;
 def RVA22S64 : RISCVProfile<"rva22s64", RVA22S64Features>;
-def RVA23U64 : RISCVExperimentalProfile<"rva23u64", RVA23U64Features>;
-def RVA23S64 : RISCVExperimentalProfile<"rva23s64", RVA23S64Features>;
-def RVB23U64 : RISCVExperimentalProfile<"rvb23u64", RVB23U64Features>;
-def RVB23S64 : RISCVExperimentalProfile<"rvb23s64", RVB23S64Features>;
+def RVA23U64 : RISCVProfile<"rva23u64", RVA23U64Features>;
+def RVA23S64 : RISCVProfile<"rva23s64", RVA23S64Features>;
+def RVB23U64 : RISCVProfile<"rvb23u64", RVB23U64Features>;
+def RVB23S64 : RISCVProfile<"rvb23s64", RVB23S64Features>;
 def RVM23U32 : RISCVExperimentalProfile<"rvm23u32", RVM23U32Features>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 24cbe1531c017cb..d07ee393bbcfd09 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -631,6 +631,9 @@ foreach mx = SchedMxList in {
     defm "" : LMULWriteResMX<"WriteVICALUV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICALUX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICALUI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMV",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMX",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMI",   [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVShiftV",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVShiftX",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVShiftI",    [SiFive7VCQ, SiFive7VA], mx, IsWorstCase>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
index 6926184e92399c2..7a54d2fe1080806 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP400.td
@@ -467,6 +467,9 @@ foreach mx = SchedMxList in {
     defm "" : LMULWriteResMX<"WriteVICALUV",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICALUX",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICALUI",   [SiFiveP400VEXQ0], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMV",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMX",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMI",  [SiFiveP400VEXQ0], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICmpV",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICmpX",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICmpI",    [SiFiveP400VEXQ0], mx, IsWorstCase>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
index 59972d781a315ae..c685a6d2b094bed 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFiveP600.td
@@ -403,6 +403,9 @@ foreach mx = SchedMxList in {
     defm "" : LMULWriteResMX<"WriteVICALUV",  [SiFiveP600VectorArith], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICALUX",  [SiFiveP600VectorArith], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICALUI",  [SiFiveP600VectorArith], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMV", [SiFiveP600VectorArith], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMX", [SiFiveP600VectorArith], mx, IsWorstCase>;
+    defm "" : LMULWriteResMX<"WriteVICALUMI", [SiFiveP600VectorArith], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICmpV",   [SiFiveP600VectorArith], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICmpX",   [SiFiveP600VectorArith], mx, IsWorstCase>;
     defm "" : LMULWriteResMX<"WriteVICmpI",   [SiFiveP600VectorArith], mx, IsWorstCase>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index ee041ea142b94c0..6b9f1dd3218913f 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -364,6 +364,9 @@ defm "" : LMULSchedWrites<"WriteVExtV">;
 defm "" : LMULSchedWrites<"WriteVICALUV">;
 defm "" : LMULSchedWrites<"WriteVICALUX">;
 defm "" : LMULSchedWrites<"WriteVICALUI">;
+defm "" : LMULSchedWrites<"WriteVICALUMV">;
+defm "" : LMULSchedWrites<"WriteVICALUMX">;
+defm "" : LMULSchedWrites<"WriteVICALUMI">;
 // 11.6. Vector Single-Width Bit Shift Instructions
 defm "" : LMULSchedWrites<"WriteVShiftV">;
 defm "" : LMULSchedWrites<"WriteVShiftX">;
@@ -856,6 +859,9 @@ defm "" : LMULWriteRes<"WriteVExtV", []>;
 defm "" : LMULWriteRes<"WriteVICALUV", []>;
 defm "" : LMULWriteRes<"WriteVICALUX", []>;
 defm "" : LMULWriteRes<"WriteVICALUI", []>;
+defm "" : LMULWriteRes<"WriteVICALUMV", []>;
+defm "" : LMULWriteRes<"WriteVICALUMX", []>;
+defm "" : LMULWriteRes<"WriteVICALUMI", []>;
 defm "" : LMULWriteRes<"WriteVShiftV", []>;
 defm "" : LMULWriteRes<"WriteVShiftX", []>;
 defm "" : LMULWriteRes<"WriteVShiftI", []>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 089dc6c529193de..72d74d2d79b1d5a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -99,11 +99,6 @@ static cl::opt<bool> EnableMISchedLoadStoreClustering(
     cl::desc("Enable load and store clustering in the machine scheduler"),
     cl::init(true));
 
-static cl::opt<bool> EnableVSETVLIAfterRVVRegAlloc(
-    "riscv-vsetvl-after-rvv-regalloc", cl::Hidden,
-    cl::desc("Insert vsetvls after vector register allocation"),
-    cl::init(true));
-
 static cl::opt<bool>
     EnableVLOptimizer("riscv-enable-vl-optimizer",
                       cl::desc("Enable the RISC-V VL Optimizer pass"),
@@ -413,8 +408,7 @@ FunctionPass *RISCVPassConfig::createRVVRegAllocPass(bool Optimized) {
 
 bool RISCVPassConfig::addRegAssignAndRewriteFast() {
   addPass(createRVVRegAllocPass(false));
-  if (EnableVSETVLIAfterRVVRegAlloc)
-    addPass(createRISCVInsertVSETVLIPass());
+  addPass(createRISCVInsertVSETVLIPass());
   if (TM->getOptLevel() != CodeGenOptLevel::None &&
       EnableRISCVDeadRegisterElimination)
     addPass(createRISCVDeadRegisterDefinitionsPass());
@@ -424,8 +418,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteFast() {
 bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {
   addPass(createRVVRegAllocPass(true));
   addPass(createVirtRegRewriter(false));
-  if (EnableVSETVLIAfterRVVRegAlloc)
-    addPass(createRISCVInsertVSETVLIPass());
+  addPass(createRISCVInsertVSETVLIPass());
   if (TM->getOptLevel() != CodeGenOptLevel::None &&
       EnableRISCVDeadRegisterElimination)
     addPass(createRISCVDeadRegisterDefinitionsPass());
@@ -575,15 +568,6 @@ void RISCVPassConfig::addPreRegAlloc() {
   addPass(createRISCVInsertReadWriteCSRPass());
   addPass(createRISCVInsertWriteVXRMPass());
   addPass(createRISCVLandingPadSetupPass());
-
-  // Run RISCVInsertVSETVLI after PHI elimination. On O1 and above do it after
-  // register coalescing so needVSETVLIPHI doesn't need to look through COPYs.
-  if (!EnableVSETVLIAfterRVVRegAlloc) {
-    if (TM->getOptLevel() == CodeGenOptLevel::None)
-      insertPass(&PHIEliminationID, &RISCVInsertVSETVLIID);
-    else
-      insertPass(&RegisterCoalescerID, &RISCVInsertVSETVLIID);
-  }
 }
 
 void RISCVPassConfig::addFastRegAlloc() {
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index d9377fe4b91a1ad..11ed7d660be09e7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2547,6 +2547,17 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
     return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt);
   case Intrinsic::spv_sign:
     return selectSign(ResVReg, ResType, I);
+  case Intrinsic::spv_group_memory_barrier_with_group_sync: {
+    Register MemSemReg =
+        buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I);
+    Register ScopeReg = buildI32Constant(SPIRV::Scope::Workgroup, I);
+    MachineBasicBlock &BB = *I.getParent();
+    return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpControlBarrier))
+        .addUse(ScopeReg)
+        .addUse(ScopeReg)
+        .addUse(MemSemReg)
+        .constrainAllUses(TII, TRI, RBI);
+  } break;
   case Intrinsic::spv_lifetime_start:
   case Intrinsic::spv_lifetime_end: {
     unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index 5b407a8b6f54a0f..4bba54463103bcb 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -38,7 +38,7 @@ bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
   return (STI.hasFeature(Sparc::FeatureV9)) != 0;
 }
 
-void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   OS << '%' << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
index 207a970228058da..52321d562118583 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h
@@ -24,7 +24,7 @@ class SparcInstPrinter : public MCInstPrinter {
                    const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp
index 05113010794e0b6..72b7bd60276a7a8 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
 
 void SystemZGNUInstPrinter::printFormattedRegName(const MCAsmInfo *MAI,
                                                   MCRegister Reg,
-                                                  raw_ostream &O) const {
+                                                  raw_ostream &O) {
   const char *RegName = getRegisterName(Reg);
   markup(O, Markup::Register) << '%' << RegName;
 }
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h
index 8f62ae0e16c006d..7095e325c70bc00 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h
@@ -38,7 +38,7 @@ class SystemZGNUInstPrinter : public SystemZInstPrinterCommon {
 
 private:
   void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg,
-                             raw_ostream &O) const override;
+                             raw_ostream &O) override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp
index 9abd408324c0672..ef9881932f7c085 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp
@@ -19,7 +19,7 @@ using namespace llvm;
 
 void SystemZHLASMInstPrinter::printFormattedRegName(const MCAsmInfo *MAI,
                                                     MCRegister Reg,
-                                                    raw_ostream &O) const {
+                                                    raw_ostream &O) {
   const char *RegName = getRegisterName(Reg);
   // Skip register prefix so that only register number is left
   assert(isalpha(RegName[0]) && isdigit(RegName[1]));
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h
index 9a69e012c72942d..ffccbec36c7491a 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h
@@ -37,7 +37,7 @@ class SystemZHLASMInstPrinter : public SystemZInstPrinterCommon {
 
 private:
   void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg,
-                             raw_ostream &O) const override;
+                             raw_ostream &O) override;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
index 00560ab1f4b18d9..fe0f3874765614d 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
@@ -57,8 +57,7 @@ void SystemZInstPrinterCommon::printOperand(const MCOperand &MO,
     llvm_unreachable("Invalid operand");
 }
 
-void SystemZInstPrinterCommon::printRegName(raw_ostream &O,
-                                            MCRegister Reg) const {
+void SystemZInstPrinterCommon::printRegName(raw_ostream &O, MCRegister Reg) {
   printFormattedRegName(&MAI, Reg, O);
 }
 
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
index 9a972824f7ffb51..1a11e421691ae38 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
@@ -36,10 +36,10 @@ class SystemZInstPrinterCommon : public MCInstPrinter {
   void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O);
 
   virtual void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg,
-                                     raw_ostream &O) const {}
+                                     raw_ostream &O) {}
 
   // Override MCInstPrinter.
-  void printRegName(raw_ostream &O, MCRegister Reg) const override;
+  void printRegName(raw_ostream &O, MCRegister Reg) override;
 
 protected:
   template <unsigned N>
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 8fbd05eab5f6ee2..f2fa7e7c9f9fee6 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -1450,11 +1450,6 @@ void SystemZXPLINKFrameLowering::inlineStackProbe(
 }
 
 bool SystemZXPLINKFrameLowering::hasFPImpl(const MachineFunction &MF) const {
-  // Naked functions have no stack frame pushed, so we don't have a frame
-  // pointer.
-  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
-    return false;
-
   return (MF.getFrameInfo().hasVarSizedObjects());
 }
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 3e05f3b0180a782..1fa2dbfb26fc25a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1400,9 +1400,11 @@ SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
                                          const MachineFunction &MF) const {
   Register Reg =
       StringSwitch<Register>(RegName)
-          .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
-          .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
-          .Default(0);
+          .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
+                                                   : SystemZ::NoRegister)
+          .Case("r15",
+                Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
+          .Default(SystemZ::NoRegister);
 
   if (Reg)
     return Reg;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
index 8261b5aa7b4e13a..47455a9a0274c2e 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "VEGenAsmWriter.inc"
 
-void VEInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void VEInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   // Generic registers have identical register name among register classes.
   unsigned AltIdx = VE::AsmName;
   // Misc registers have each own name, so no use alt-names.
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
index 65660a49c5e4dd3..d5e0ebd3596ca86 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h
@@ -24,7 +24,7 @@ class VEInstPrinter : public MCInstPrinter {
                 const MCRegisterInfo &MRI)
       : MCInstPrinter(MAI, MII, MRI) {}
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
 
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index 4c29b59b3302e47..026f859b15d7152 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -38,8 +38,7 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
                                                const MCRegisterInfo &MRI)
     : MCInstPrinter(MAI, MII, MRI) {}
 
-void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
-                                          MCRegister Reg) const {
+void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   assert(Reg.id() != WebAssembly::UnusedReg);
   // Note that there's an implicit local.get/local.set here!
   OS << "$" << Reg.id();
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index b499926ab82965b..e7c5e14973b630a 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -35,7 +35,7 @@ class WebAssemblyInstPrinter final : public MCInstPrinter {
   WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                          const MCRegisterInfo &MRI);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index 37d99690c25b1fa..88628f2a7935453 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -114,7 +114,8 @@ def : ProcessorModel<"mvp", NoSchedModel, []>;
 // consideration given to available support in relevant engines and tools, and
 // the importance of the features.
 def : ProcessorModel<"generic", NoSchedModel,
-                      [FeatureMultivalue, FeatureMutableGlobals,
+                      [FeatureBulkMemory, FeatureMultivalue,
+                       FeatureMutableGlobals, FeatureNontrappingFPToInt,
                        FeatureReferenceTypes, FeatureSignExt]>;
 
 // Latest and greatest experimental version of WebAssembly. Bugs included!
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index a3cc9bae470859b..7c3e8d18ad276bb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -111,6 +111,7 @@ static Function *createWrapper(Function *F, FunctionType *Ty) {
 
   Function *Wrapper = Function::Create(Ty, Function::PrivateLinkage,
                                        F->getName() + "_bitcast", M);
+  Wrapper->setAttributes(F->getAttributes());
   BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
   const DataLayout &DL = BB->getDataLayout();
 
@@ -201,6 +202,7 @@ static Function *createWrapper(Function *F, FunctionType *Ty) {
     Wrapper->eraseFromParent();
     Wrapper = Function::Create(Ty, Function::PrivateLinkage,
                                F->getName() + "_bitcast_invalid", M);
+    Wrapper->setAttributes(F->getAttributes());
     BasicBlock *BB = BasicBlock::Create(M->getContext(), "body", Wrapper);
     new UnreachableInst(M->getContext(), BB);
     Wrapper->setName(F->getName() + "_bitcast_invalid");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 3fe6ccf1c608e1e..83cd57d0bbdd557 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -233,13 +233,30 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass {
 
 private:
   FeatureBitset coalesceFeatures(const Module &M) {
-    FeatureBitset Features =
-        WasmTM
-            ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()),
-                               std::string(WasmTM->getTargetFeatureString()))
-            ->getFeatureBits();
-    for (auto &F : M)
+    // Union the features of all defined functions. Start with an empty set, so
+    // that if a feature is disabled in every function, we'll compute it as
+    // disabled. If any function lacks a target-features attribute, it'll
+    // default to the target CPU from the `TargetMachine`.
+    FeatureBitset Features;
+    bool AnyDefinedFuncs = false;
+    for (auto &F : M) {
+      if (F.isDeclaration())
+        continue;
+
       Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits();
+      AnyDefinedFuncs = true;
+    }
+
+    // If we have no defined functions, use the target CPU from the
+    // `TargetMachine`.
+    if (!AnyDefinedFuncs) {
+      Features =
+          WasmTM
+              ->getSubtargetImpl(std::string(WasmTM->getTargetCPU()),
+                                 std::string(WasmTM->getTargetFeatureString()))
+              ->getFeatureBits();
+    }
+
     return Features;
   }
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 58b4527af6557b2..c811d621e60eb7b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -35,7 +35,7 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "X86GenAsmWriter.inc"
 
-void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   markup(OS, Markup::Register) << '%' << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
index 83040c112b68850..7e525e232362299 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h
@@ -23,7 +23,7 @@ class X86ATTInstPrinter final : public X86InstPrinterCommon {
                     const MCRegisterInfo &MRI)
       : X86InstPrinterCommon(MAI, MII, MRI), HasCustomInstComment(false) {}
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
   bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
index 587f923e789f02f..49e8bab4c0363da 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp
@@ -40,6 +40,20 @@ using namespace llvm;
   CASE_MASK_INS_COMMON(Inst, Suffix, src)         \
   CASE_MASKZ_INS_COMMON(Inst, Suffix, src)
 
+#define CASE_FPCLASS_PACKED(Inst, src)    \
+  CASE_AVX_INS_COMMON(Inst, Z, r##src)    \
+  CASE_AVX_INS_COMMON(Inst, Z256, r##src) \
+  CASE_AVX_INS_COMMON(Inst, Z128, r##src) \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)
+
+#define CASE_FPCLASS_PACKED_MEM(Inst) \
+  CASE_FPCLASS_PACKED(Inst, m)        \
+  CASE_FPCLASS_PACKED(Inst, mb)
+
+#define CASE_FPCLASS_SCALAR(Inst, src)  \
+  CASE_AVX_INS_COMMON(Inst, Z, r##src)  \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)
+
 #define CASE_PTERNLOG(Inst, src)                                               \
   CASE_AVX512_INS_COMMON(Inst, Z, r##src##i)                                   \
   CASE_AVX512_INS_COMMON(Inst, Z256, r##src##i)                                \
@@ -949,6 +963,70 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS,
   return true;
 }
 
+static bool printFPCLASSComments(const MCInst *MI, raw_ostream &OS,
+                                 const MCInstrInfo &MCII) {
+  unsigned NumOperands = MI->getNumOperands();
+  int SrcIdx;
+  switch (MI->getOpcode()) {
+    CASE_FPCLASS_PACKED(FPCLASSPBF16, r)
+    CASE_FPCLASS_PACKED(FPCLASSPH, r)
+    CASE_FPCLASS_PACKED(FPCLASSPS, r)
+    CASE_FPCLASS_PACKED(FPCLASSPD, r)
+    CASE_FPCLASS_SCALAR(FPCLASSSH, r)
+    CASE_FPCLASS_SCALAR(FPCLASSSS, r)
+    CASE_FPCLASS_SCALAR(FPCLASSSD, r) {
+      SrcIdx = NumOperands - 2;
+      break;
+    }
+    CASE_FPCLASS_PACKED_MEM(FPCLASSPBF16)
+    CASE_FPCLASS_PACKED_MEM(FPCLASSPH)
+    CASE_FPCLASS_PACKED_MEM(FPCLASSPS)
+    CASE_FPCLASS_PACKED_MEM(FPCLASSPD)
+    CASE_FPCLASS_SCALAR(FPCLASSSH, m)
+    CASE_FPCLASS_SCALAR(FPCLASSSS, m)
+    CASE_FPCLASS_SCALAR(FPCLASSSD, m) {
+      SrcIdx = -1;
+      break;
+    }
+  default:
+    return false;
+  }
+  StringRef DestName = getRegName(MI->getOperand(0).getReg());
+  StringRef SrcName =
+      SrcIdx != -1 ? getRegName(MI->getOperand(SrcIdx).getReg()) : "mem";
+
+  OS << DestName;
+  printMasking(OS, MI, MCII);
+  OS << " = ";
+
+  uint8_t Categories = MI->getOperand(NumOperands - 1).getImm();
+  if (Categories == 0) {
+    OS << "false";
+  } else {
+    static constexpr StringLiteral CategoryNames[] = {
+      "QuietNaN",
+      "PositiveZero",
+      "NegativeZero",
+      "PositiveInfinity",
+      "NegativeInfinity",
+      "Subnormal",
+      "Negative",
+      "SignalingNaN",
+    };
+    bool Conjoin = false;
+    for (size_t I = 0, E = std::size(CategoryNames); I != E; ++I) {
+      if (Categories & (1 << I)) {
+        if (Conjoin)
+          OS << " | ";
+        Conjoin = true;
+        OS << "is" << CategoryNames[I] << '(' << SrcName << ')';
+      }
+    }
+  }
+  OS << '\n';
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 // Top Level Entrypoint
 //===----------------------------------------------------------------------===//
@@ -970,6 +1048,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   if (printPTERNLOGComments(MI, OS, MCII))
     return true;
 
+  if (printFPCLASSComments(MI, OS, MCII))
+    return true;
+
   switch (MI->getOpcode()) {
   default:
     // Not an instruction for which we can decode comments.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index cd8b9aa62573001..8e7dae229275bbb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -33,7 +33,7 @@ using namespace llvm;
 #define PRINT_ALIAS_INSTR
 #include "X86GenAsmWriter1.inc"
 
-void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   markup(OS, Markup::Register) << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
index a34c06782f40422..988ab9626c3fd7e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h
@@ -24,7 +24,7 @@ class X86IntelInstPrinter final : public X86InstPrinterCommon {
                       const MCRegisterInfo &MRI)
     : X86InstPrinterCommon(MAI, MII, MRI) {}
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &OS) override;
   bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 47eb617c06ac5bb..39b0f7c4c4c1e63 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -23,9 +23,9 @@ enum AsmWriterFlavorTy {
   ATT = 0, Intel = 1
 };
 
-static cl::opt<AsmWriterFlavorTy> AsmWriterFlavor(
+static cl::opt<AsmWriterFlavorTy> X86AsmSyntax(
     "x86-asm-syntax", cl::init(ATT), cl::Hidden,
-    cl::desc("Choose style of code to emit from X86 backend:"),
+    cl::desc("Select the assembly style for input"),
     cl::values(clEnumValN(ATT, "att", "Emit AT&T-style assembly"),
                clEnumValN(Intel, "intel", "Emit Intel-style assembly")));
 
@@ -41,7 +41,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
   if (is64Bit)
     CodePointerSize = CalleeSaveStackSlotSize = 8;
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   if (!is64Bit)
     Data64bitsDirective = nullptr;       // we can't emit a 64-bit unit
@@ -89,7 +89,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
   CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   // Debug Information
   SupportsDebugInformation = true;
@@ -126,7 +126,7 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
 
   ExceptionsType = ExceptionHandling::WinEH;
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   AllowAtInName = true;
 }
@@ -159,7 +159,7 @@ X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
     ExceptionsType = ExceptionHandling::DwarfCFI;
   }
 
-  AssemblerDialect = AsmWriterFlavor;
+  AssemblerDialect = X86AsmSyntax;
 
   AllowAtInName = true;
 }
diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index a0c91d4e3c3d7eb..fe2c8fff577503e 100644
--- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -443,8 +443,7 @@ void FixupBWInstPass::processBasicBlock(MachineFunction &MF,
   // We run after PEI, so we need to AddPristinesAndCSRs.
   LiveUnits.addLiveOuts(MBB);
 
-  OptForSize = MF.getFunction().hasOptSize() ||
-               llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
+  OptForSize = llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
 
   for (MachineInstr &MI : llvm::reverse(MBB)) {
     if (MachineInstr *NewMI = tryReplaceInstr(&MI, MBB))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a6d77873ec2901b..34bc5d76c15ceaa 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2393,6 +2393,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
     for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
       setF16Action(VT, Expand);
+      if (!Subtarget.hasBF16())
+        setOperationAction(ISD::VSELECT, VT, Custom);
       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
@@ -2406,7 +2408,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
   }
 
-  if (!Subtarget.useSoftFloat() && Subtarget.hasBF16()) {
+  if (!Subtarget.useSoftFloat() && Subtarget.hasBF16() &&
+      Subtarget.useAVX512Regs()) {
     addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
     setF16Action(MVT::v32bf16, Expand);
     for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV})
@@ -2419,27 +2422,27 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasAVX10_2()) {
-    addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
-    addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
-    addRegisterClass(MVT::v32bf16, &X86::VR512RegClass);
-
-    setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
-    setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
-    setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
-    if (Subtarget.hasVLX()) {
-      for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
-        setOperationAction(ISD::FADD, VT, Legal);
-        setOperationAction(ISD::FSUB, VT, Legal);
-        setOperationAction(ISD::FMUL, VT, Legal);
-        setOperationAction(ISD::FDIV, VT, Legal);
-        setOperationAction(ISD::FSQRT, VT, Legal);
-        setOperationAction(ISD::FMA, VT, Legal);
-        setOperationAction(ISD::SETCC, VT, Custom);
-      }
+    for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
+      setOperationAction(ISD::FADD, VT, Legal);
+      setOperationAction(ISD::FSUB, VT, Legal);
+      setOperationAction(ISD::FMUL, VT, Legal);
+      setOperationAction(ISD::FDIV, VT, Legal);
+      setOperationAction(ISD::FSQRT, VT, Legal);
+      setOperationAction(ISD::FMA, VT, Legal);
+      setOperationAction(ISD::SETCC, VT, Custom);
+    }
+    if (Subtarget.hasAVX10_2_512()) {
+      setOperationAction(ISD::FADD, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FSUB, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FMUL, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FDIV, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FSQRT, MVT::v32bf16, Legal);
+      setOperationAction(ISD::FMA, MVT::v32bf16, Legal);
+      setOperationAction(ISD::SETCC, MVT::v32bf16, Custom);
+    }
+    for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) {
+      setCondCodeAction(ISD::SETOEQ, VT, Custom);
+      setCondCodeAction(ISD::SETUNE, VT, Custom);
     }
   }
 
@@ -24073,6 +24076,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
   }
 
+  if (Subtarget.hasAVX10_2()) {
+    if (CC == ISD::SETOEQ || CC == ISD::SETUNE) {
+      auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE);
+      return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1),
+                      dl, DAG);
+    }
+  }
   // Handle floating point.
   X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG);
   if (CondCode == X86::COND_INVALID)
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 9ef2debb57fa007..0301c07dfb540b7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_
 //-------------------------------------------------
 // AVX10  COMEF instructions
 //-------------------------------------------------
+multiclass avx10_com_ef<bits<8> Opc, RegisterClass RC, ValueType VT,
+                        SDPatternOperator OpNode, string OpcodeStr,
+                        X86MemOperand x86memop, PatFrag ld_frag,
+                        Domain d, X86FoldableSchedWrite sched = WriteFComX>{
+  let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in {
+    def rr : AVX512<Opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                    !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                    [(set EFLAGS, (OpNode (VT RC:$src1), RC:$src2))]>,
+                    EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC;
+    let mayLoad = 1 in {
+      def rm : AVX512<Opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                      [(set EFLAGS, (OpNode (VT RC:$src1), (ld_frag addr:$src2)))]>,
+                      EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
+    }
+  }
+}
+
 multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
                              string OpcodeStr,
                              Domain d,
@@ -1564,6 +1582,15 @@ multiclass avx10_com_ef_int<bits<8> Opc, X86VectorVTInfo _, SDNode OpNode,
 }
 
 let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in {
+  defm VUCOMXSDZ  :  avx10_com_ef<0x2e, FR64X, f64, X86ucomi512,
+                                  "vucomxsd", f64mem, loadf64, SSEPackedDouble>,
+                                  TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
+  defm VUCOMXSHZ  :  avx10_com_ef<0x2e, FR16X, f16, X86ucomi512,
+                                  "vucomxsh", f16mem, loadf16, SSEPackedSingle>,
+                                  T_MAP5, XD, EVEX_CD8<16, CD8VT1>;
+  defm VUCOMXSSZ  :  avx10_com_ef<0x2e, FR32X, f32, X86ucomi512,
+                                  "vucomxss", f32mem, loadf32, SSEPackedSingle>,
+                                  TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
   defm VCOMXSDZ   :  avx10_com_ef_int<0x2f, v2f64x_info, X86comi512,
                                       "vcomxsd", SSEPackedDouble>,
                                       TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -1675,3 +1702,17 @@ defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
                           T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
 defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
                           T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+  // SM4_Base is in X86InstrSSE.td.
+  let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
+    defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
+    defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
+  }
+  let Predicates = [HasSM4, HasAVX10_2_512] in
+    defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 98c31867e6b22b0..32c4ebc331f1d72 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10549,6 +10549,9 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _, string Name> {
   def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, undef)),
             (!cast<Instruction>(Name#_.ZSuffix#rrkz)
                             _.KRCWM:$mask, _.RC:$src)>;
+  def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.ImmAllZerosV)),
+            (!cast<Instruction>(Name#_.ZSuffix#rrkz)
+                            _.KRCWM:$mask, _.RC:$src)>;
   def : Pat<(_.VT (vector_compress _.RC:$src, _.KRCWM:$mask, _.RC:$passthru)),
               (!cast<Instruction>(Name#_.ZSuffix#rrk)
                             _.RC:$passthru, _.KRCWM:$mask, _.RC:$src)>;
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index e1573b37d4dc265..dc701f1afc915f7 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -466,7 +466,10 @@ multiclass Urdwrmsr<Map rrmap, string suffix> {
                                 "urdmsr\t{$imm, $dst|$dst, $imm}",
                                 [(set GR64:$dst, (int_x86_urdmsr i64immSExt32_su:$imm))]>,
                            T_MAP7, VEX, XD, NoCD8;
-}
+    def RDMSRri#suffix  : Ii32<0xf6, MRM0r, (outs GR64:$dst), (ins i64i32imm:$imm),
+                                "rdmsr\t{$imm, $dst|$dst, $imm}", []>,
+                           T_MAP7, VEX, XD, NoCD8;
+  }
   let mayStore = 1 in {
     let OpMap = rrmap in
     def UWRMSRrr#suffix  : I<0xf8, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
@@ -476,6 +479,9 @@ multiclass Urdwrmsr<Map rrmap, string suffix> {
                                 "uwrmsr\t{$src, $imm|$imm, $src}",
                                 [(int_x86_uwrmsr i64immSExt32_su:$imm, GR64:$src)]>,
                            T_MAP7, VEX, XS, NoCD8;
+    def WRMSRNSir#suffix  : Ii32<0xf6, MRM0r, (outs), (ins GR64:$src, i64i32imm:$imm),
+                                "wrmsrns\t{$src, $imm|$imm, $src}",
+                                []>, T_MAP7, VEX, XS, NoCD8;
   }
 }
 
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 3172896a8f6092c..280eaf04f23c5ae 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -741,9 +741,7 @@ bool X86OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
 
     // Remove redundant address calculations. Do it only for -Os/-Oz since only
     // a code size gain is expected from this part of the pass.
-    bool OptForSize = MF.getFunction().hasOptSize() ||
-                      llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
-    if (OptForSize)
+    if (llvm::shouldOptimizeForSize(&MBB, PSI, MBFI))
       Changed |= removeRedundantAddrCalc(LEAs);
   }
 
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index bb59cee8badba7f..50d63e196d1d0cd 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -132,9 +132,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
     MachineBasicBlock *MBB = ReturnBB.first;
     unsigned Cycles = ReturnBB.second;
 
-    // Function::hasOptSize is already checked above.
-    bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);
-    if (OptForSize)
+    if (llvm::shouldOptimizeForSize(MBB, PSI, MBFI))
       continue;
 
     if (Cycles < Threshold) {
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index c30e989cdc2af19..38d8d19091e0fdc 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -220,6 +220,22 @@ def AlderLakePfmCounters : ProcPfmCounters {
 }
 def : PfmCountersBinding<"alderlake", AlderLakePfmCounters>;
 
+def SapphireRapidsPfmCounters : ProcPfmCounters {
+  let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+  let UopsCounter = UopsIssuedPfmCounter;
+  let IssueCounters = [
+    PfmIssueCounter<"SPRPort00", "uops_dispatched_port:port_0">,
+    PfmIssueCounter<"SPRPort01", "uops_dispatched_port:port_1">,
+    PfmIssueCounter<"SPRPort02_03_10", "uops_dispatched_port:port_2_3_10">,
+    PfmIssueCounter<"SPRPort04_09", "uops_dispatched_port:port_4_9">,
+    PfmIssueCounter<"SPRPort05_11", "uops_dispatched_port:port_5_11">,
+    PfmIssueCounter<"SPRPort06", "uops_dispatched_port:port_6">,
+    PfmIssueCounter<"SPRPort07_08", "uops_dispatched_port:port_7_8">,
+  ];
+  let ValidationCounters = DefaultIntelPfmValidationCounters;
+}
+def : PfmCountersBinding<"sapphirerapids", SapphireRapidsPfmCounters>;
+
 // AMD X86 Counters.
 defvar DefaultAMDPfmValidationCounters = [
   PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">,
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
index 6e292da4e293dbd..b0ebe70c31fd449 100644
--- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -59,6 +59,8 @@ def SPRPort01_05          : ProcResGroup<[SPRPort01, SPRPort05]>;
 def SPRPort01_05_10       : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>;
 def SPRPort02_03          : ProcResGroup<[SPRPort02, SPRPort03]>;
 def SPRPort02_03_11       : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>;
+def SPRPort02_03_10       : ProcResGroup<[SPRPort02, SPRPort03, SPRPort10]>;
+def SPRPort05_11          : ProcResGroup<[SPRPort05, SPRPort11]>;
 def SPRPort07_08          : ProcResGroup<[SPRPort07, SPRPort08]>;
 
 // EU has 112 reservation stations.
@@ -78,6 +80,10 @@ def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07,
   let BufferSize = 72;
 }
 
+def SPRPortAny : ProcResGroup<[SPRPort00, SPRPort01, SPRPort02, SPRPort03,
+                               SPRPort04, SPRPort05, SPRPort06, SPRPort07,
+                               SPRPort08, SPRPort09, SPRPort10, SPRPort11]>;
+
 // Integer loads are 5 cycles, so ReadAfterLd registers needn't be available
 // until 5 cycles after the memory operand.
 def : ReadAdvance<ReadAfterLd, 5>;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 413ef0136d5c06f..520284d1d7a4887 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2296,7 +2296,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f32,   { 1, 1, 1, 1 } },
     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v16f32,  { 3, 1, 1, 1 } },
     { ISD::FP_EXTEND, MVT::v16f64,  MVT::v16f32,  { 4, 1, 1, 1 } }, // 2*vcvtps2pd+vextractf64x4
+    { ISD::FP_EXTEND, MVT::v16f32,  MVT::v16f16,  { 1, 1, 1, 1 } }, // vcvtph2ps
+    { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f16,   { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
     { ISD::FP_ROUND,  MVT::v8f32,   MVT::v8f64,   { 1, 1, 1, 1 } },
+    { ISD::FP_ROUND,  MVT::v16f16,  MVT::v16f32,  { 1, 1, 1, 1 } }, // vcvtps2ph
 
     { ISD::TRUNCATE,  MVT::v2i1,    MVT::v2i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
     { ISD::TRUNCATE,  MVT::v4i1,    MVT::v4i8,    { 3, 1, 1, 1 } }, // sext+vpslld+vptestmd
@@ -2973,6 +2976,17 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::TRUNCATE,    MVT::v4i32,  MVT::v2i64,  { 1, 1, 1, 1 } }, // PSHUFD
   };
 
+  static const TypeConversionCostKindTblEntry F16ConversionTbl[] = {
+    { ISD::FP_ROUND,  MVT::f16,     MVT::f32,     { 1, 1, 1, 1 } },
+    { ISD::FP_ROUND,  MVT::v8f16,   MVT::v8f32,   { 1, 1, 1, 1 } },
+    { ISD::FP_ROUND,  MVT::v4f16,   MVT::v4f32,   { 1, 1, 1, 1 } },
+    { ISD::FP_EXTEND, MVT::f32,     MVT::f16,     { 1, 1, 1, 1 } },
+    { ISD::FP_EXTEND, MVT::f64,     MVT::f16,     { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
+    { ISD::FP_EXTEND, MVT::v8f32,   MVT::v8f16,   { 1, 1, 1, 1 } },
+    { ISD::FP_EXTEND, MVT::v4f32,   MVT::v4f16,   { 1, 1, 1, 1 } },
+    { ISD::FP_EXTEND, MVT::v4f64,   MVT::v4f16,   { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
+  };
+
   // Attempt to map directly to (simple) MVT types to let us match custom entries.
   EVT SrcTy = TLI->getValueType(DL, Src);
   EVT DstTy = TLI->getValueType(DL, Dst);
@@ -3034,6 +3048,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
           return *KindCost;
     }
 
+    if (ST->hasF16C()) {
+      if (const auto *Entry = ConvertCostTableLookup(F16ConversionTbl, ISD,
+                                                     SimpleDstTy, SimpleSrcTy))
+        if (auto KindCost = Entry->Cost[CostKind])
+          return *KindCost;
+    }
+
     if (ST->hasSSE41()) {
       if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
                                                      SimpleDstTy, SimpleSrcTy))
@@ -3047,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
         if (auto KindCost = Entry->Cost[CostKind])
           return *KindCost;
     }
+
+    if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) ||
+        (ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) {
+      // fp16 conversions not covered by any table entries require a libcall.
+      // Return a large (arbitrary) number to model this.
+      return InstructionCost(64);
+    }
   }
 
   // Fall back to legalized types.
@@ -3107,6 +3135,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
       if (auto KindCost = Entry->Cost[CostKind])
         return std::max(LTSrc.first, LTDest.first) * *KindCost;
 
+  if (ST->hasF16C()) {
+    if (const auto *Entry = ConvertCostTableLookup(F16ConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      if (auto KindCost = Entry->Cost[CostKind])
+        return std::max(LTSrc.first, LTDest.first) * *KindCost;
+  }
+
   if (ST->hasSSE41())
     if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
                                                    LTDest.second, LTSrc.second))
@@ -6923,6 +6958,14 @@ bool X86TTIImpl::isVectorShiftByScalarCheap(Type *Ty) const {
   return true;
 }
 
+unsigned X86TTIImpl::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+                                       Type *ScalarValTy) const {
+  if (ST->hasF16C() && ScalarMemTy->isHalfTy()) {
+    return 4;
+  }
+  return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
+}
+
 bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
                                             SmallVectorImpl<Use *> &Ops) const {
   using namespace llvm::PatternMatch;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0100f328ab4bd30..36d00cee0d18b57 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -302,6 +302,9 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
 
   bool isVectorShiftByScalarCheap(Type *Ty) const;
 
+  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+                             Type *ScalarValTy) const;
+
 private:
   bool supportsGather() const;
   InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp
index bc9fd801f94b224..ef2127367301143 100644
--- a/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -210,13 +210,11 @@ Type *WinEHStatePass::getEHLinkRegistrationType() {
   if (EHLinkRegistrationTy)
     return EHLinkRegistrationTy;
   LLVMContext &Context = TheModule->getContext();
-  EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode");
   Type *FieldTys[] = {
-      PointerType::getUnqual(
-          EHLinkRegistrationTy->getContext()), // EHRegistrationNode *Next
-      PointerType::getUnqual(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
+      PointerType::getUnqual(Context), // EHRegistrationNode *Next
+      PointerType::getUnqual(Context)  // EXCEPTION_DISPOSITION (*Handler)(...)
   };
-  EHLinkRegistrationTy->setBody(FieldTys, false);
+  EHLinkRegistrationTy = StructType::create(FieldTys, "EHRegistrationNode");
   return EHLinkRegistrationTy;
 }
 
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
index eda90d3101ab481..707c4a790872805 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 
 #include "XCoreGenAsmWriter.inc"
 
-void XCoreInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
+void XCoreInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) {
   OS << StringRef(getRegisterName(Reg)).lower();
 }
 
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
index 916ca99968fbb0c..2b47de457322ee2 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h
@@ -31,7 +31,7 @@ class XCoreInstPrinter : public MCInstPrinter {
   void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
   static const char *getRegisterName(MCRegister Reg);
 
-  void printRegName(raw_ostream &OS, MCRegister Reg) const override;
+  void printRegName(raw_ostream &OS, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
 
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp
index fe1dc0e2e483e72..e04d7bd211216f2 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp
@@ -74,7 +74,7 @@ void XtensaInstPrinter::printInst(const MCInst *MI, uint64_t Address,
   printAnnotation(O, Annot);
 }
 
-void XtensaInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+void XtensaInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) {
   O << getRegisterName(Reg);
 }
 
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h
index 46a35ae6f4e3fad..4122b1ff2310b71 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h
@@ -36,7 +36,7 @@ class XtensaInstPrinter : public MCInstPrinter {
   static void printOperand(const MCOperand &MO, raw_ostream &O);
 
   // Override MCInstPrinter.
-  void printRegName(raw_ostream &O, MCRegister Reg) const override;
+  void printRegName(raw_ostream &O, MCRegister Reg) override;
   void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
                  const MCSubtargetInfo &STI, raw_ostream &O) override;
 
diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp
index caa5a97747ee57b..de5b5c39c9ed271 100644
--- a/llvm/lib/TargetParser/RISCVISAInfo.cpp
+++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp
@@ -751,17 +751,6 @@ Error RISCVISAInfo::checkDependency() {
   if (HasZvl && !HasVector)
     return getExtensionRequiresError("zvl*b", "v' or 'zve*");
 
-  if (!HasVector)
-    for (auto Ext :
-         {"zvbb", "zvbc32e", "zvkb", "zvkg", "zvkgs", "zvkned", "zvknha", "zvksed", "zvksh"})
-      if (Exts.count(Ext))
-        return getExtensionRequiresError(Ext, "v' or 'zve*");
-
-  if (!Exts.count("zve64x"))
-    for (auto Ext : {"zvknhb", "zvbc"})
-      if (Exts.count(Ext))
-        return getExtensionRequiresError(Ext, "v' or 'zve64*");
-
   if ((HasZcmt || Exts.count("zcmp")) && HasD && (HasC || Exts.count("zcd")))
     return getError(Twine("'") + (HasZcmt ? "zcmt" : "zcmp") +
                     "' extension is incompatible with '" +
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index a3674306f3e10e6..5375448d2d2e2b8 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -123,11 +123,11 @@ void Lowerer::lowerCoroNoop(IntrinsicInst *II) {
     Module &M = *II->getModule();
 
     // Create a noop.frame struct type.
-    StructType *FrameTy = StructType::create(C, "NoopCoro.Frame");
     auto *FnTy = FunctionType::get(Type::getVoidTy(C), Builder.getPtrTy(0),
                                    /*isVarArg=*/false);
     auto *FnPtrTy = Builder.getPtrTy(0);
-    FrameTy->setBody({FnPtrTy, FnPtrTy});
+    StructType *FrameTy =
+        StructType::create({FnPtrTy, FnPtrTy}, "NoopCoro.Frame");
 
     // Create a Noop function that does nothing.
     Function *NoopFn =
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 021fcc20c1f18bb..bb6126026d90581 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -290,8 +290,8 @@ class FrameTypeBuilder {
     return Fields.size() - 1;
   }
 
-  /// Finish the layout and set the body on the given type.
-  void finish(StructType *Ty);
+  /// Finish the layout and create the struct type with the given name.
+  StructType *finish(StringRef Name);
 
   uint64_t getStructSize() const {
     assert(IsFinished && "not yet finished!");
@@ -464,7 +464,7 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
   });
 }
 
-void FrameTypeBuilder::finish(StructType *Ty) {
+StructType *FrameTypeBuilder::finish(StringRef Name) {
   assert(!IsFinished && "already finished!");
 
   // Prepare the optimal-layout field array.
@@ -526,7 +526,7 @@ void FrameTypeBuilder::finish(StructType *Ty) {
     LastOffset = Offset + F.Size;
   }
 
-  Ty->setBody(FieldTypes, Packed);
+  StructType *Ty = StructType::create(Context, FieldTypes, Name, Packed);
 
 #ifndef NDEBUG
   // Check that the IR layout matches the offsets we expect.
@@ -538,6 +538,8 @@ void FrameTypeBuilder::finish(StructType *Ty) {
 #endif
 
   IsFinished = true;
+
+  return Ty;
 }
 
 static void cacheDIVar(FrameDataInfo &FrameData,
@@ -866,11 +868,6 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
                                   bool OptimizeFrame) {
   LLVMContext &C = F.getContext();
   const DataLayout &DL = F.getDataLayout();
-  StructType *FrameTy = [&] {
-    SmallString<32> Name(F.getName());
-    Name.append(".Frame");
-    return StructType::create(C, Name);
-  }();
 
   // We will use this value to cap the alignment of spilled values.
   std::optional<Align> MaxFrameAlignment;
@@ -931,7 +928,12 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
     FrameData.setFieldIndex(S.first, Id);
   }
 
-  B.finish(FrameTy);
+  StructType *FrameTy = [&] {
+    SmallString<32> Name(F.getName());
+    Name.append(".Frame");
+    return B.finish(Name);
+  }();
+
   FrameData.updateLayoutIndex(B);
   Shape.FrameAlign = B.getStructAlign();
   Shape.FrameSize = B.getStructSize();
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 20249a20a37e413..919d3143a13f7e7 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -84,14 +84,11 @@ static cl::opt<bool> SpecializeOnAddress(
     "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
     "Enable function specialization on the address of global values"));
 
-// Disabled by default as it can significantly increase compilation times.
-//
-// https://llvm-compile-time-tracker.com
-// https://github.com/nikic/llvm-compile-time-tracker
 static cl::opt<bool> SpecializeLiteralConstant(
-    "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
-    "Enable specialization of functions that take a literal constant as an "
-    "argument"));
+    "funcspec-for-literal-constant", cl::init(true), cl::Hidden,
+    cl::desc(
+        "Enable specialization of functions that take a literal constant as an "
+        "argument"));
 
 bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
                                          DenseSet<BasicBlock *> &DeadBlocks) {
@@ -646,6 +643,18 @@ FunctionSpecializer::~FunctionSpecializer() {
   cleanUpSSA();
 }
 
+/// Get the unsigned Value of given Cost object. Assumes the Cost is always
+/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
+/// always Valid.
+static unsigned getCostValue(const Cost &C) {
+  int64_t Value = *C.getValue();
+
+  assert(Value >= 0 && "CodeSize and Latency cannot be negative");
+  // It is safe to down cast since we know the arguments cannot be negative and
+  // Cost is of type int64_t.
+  return static_cast<unsigned>(Value);
+}
+
 /// Attempt to specialize functions in the module to enable constant
 /// propagation across function boundaries.
 ///
@@ -682,10 +691,11 @@ bool FunctionSpecializer::run() {
         (RequireMinSize && Metrics.NumInsts < MinFunctionSize))
       continue;
 
-    // TODO: For now only consider recursive functions when running multiple
-    // times. This should change if specialization on literal constants gets
-    // enabled.
-    if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
+    // When specialization on literal constants is disabled, only consider
+    // recursive functions when running multiple times to save wasted analysis,
+    // as we will not be able to specialize on any newly found literal constant
+    // return values.
+    if (!SpecializeLiteralConstant && !Inserted && !Metrics.isRecursive)
       continue;
 
     int64_t Sz = *Metrics.NumInsts.getValue();
@@ -759,6 +769,11 @@ bool FunctionSpecializer::run() {
   SmallVector<Function *> Clones;
   for (unsigned I = 0; I < NSpecs; ++I) {
     Spec &S = AllSpecs[BestSpecs[I]];
+
+    // Accumulate the codesize growth for the function, now we are creating the
+    // specialization.
+    FunctionGrowth[S.F] += S.CodeSize;
+
     S.Clone = createSpecialization(S.F, S.Sig);
 
     // Update the known call sites to call the clone.
@@ -837,18 +852,6 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
   return Clone;
 }
 
-/// Get the unsigned Value of given Cost object. Assumes the Cost is always
-/// non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
-/// always Valid.
-static unsigned getCostValue(const Cost &C) {
-  int64_t Value = *C.getValue();
-
-  assert(Value >= 0 && "CodeSize and Latency cannot be negative");
-  // It is safe to down cast since we know the arguments cannot be negative and
-  // Cost is of type int64_t.
-  return static_cast<unsigned>(Value);
-}
-
 bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
                                               SmallVectorImpl<Spec> &AllSpecs,
                                               SpecMap &SM) {
@@ -924,16 +927,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
       }
       CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs();
 
+      unsigned CodeSizeSavings = getCostValue(CodeSize);
+      unsigned SpecSize = FuncSize - CodeSizeSavings;
+
       auto IsProfitable = [&]() -> bool {
         // No check required.
         if (ForceSpecialization)
           return true;
 
-        unsigned CodeSizeSavings = getCostValue(CodeSize);
-        // TODO: We should only accumulate codesize increase of specializations
-        // that are actually created.
-        FunctionGrowth[F] += FuncSize - CodeSizeSavings;
-
         LLVM_DEBUG(
             dbgs() << "FnSpecialization: Specialization bonus {Inlining = "
                    << Score << " (" << (Score * 100 / FuncSize) << "%)}\n");
@@ -964,7 +965,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
         if (LatencySavings < MinLatencySavings * FuncSize / 100)
           return false;
         // Maximum codesize growth.
-        if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
+        if ((FunctionGrowth[F] + SpecSize) / FuncSize > MaxCodeSizeGrowth)
           return false;
 
         Score += std::max(CodeSizeSavings, LatencySavings);
@@ -976,7 +977,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
         continue;
 
       // Create a new specialisation entry.
-      auto &Spec = AllSpecs.emplace_back(F, S, Score);
+      auto &Spec = AllSpecs.emplace_back(F, S, Score, SpecSize);
       if (CS.getFunction() != F)
         Spec.CallSites.push_back(&CS);
       const unsigned Index = AllSpecs.size() - 1;
@@ -1001,8 +1002,7 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) {
     return false;
 
   // If we're optimizing the function for size, we shouldn't specialize it.
-  if (F->hasOptSize() ||
-      shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass))
+  if (shouldOptimizeForSize(F, nullptr, nullptr, PGSOQueryType::IRPass))
     return false;
 
   // Exit if the function is not executable. There's no point in specializing
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 4efd683dfca3633..da5ded23ecc0453 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -928,8 +928,11 @@ bool allocTypesMatch(
     const std::vector<uint8_t> &InAllocTypes,
     const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
         &Edges) {
+  // This should be called only when the InAllocTypes vector was computed for
+  // this set of Edges. Make sure the sizes are the same.
+  assert(InAllocTypes.size() == Edges.size());
   return std::equal(
-      InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
+      InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(), Edges.end(),
       [](const uint8_t &l,
          const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
         // Can share if one of the edges is None type - don't
@@ -942,6 +945,46 @@ bool allocTypesMatch(
       });
 }
 
+// Helper to check if the alloc types for all edges recorded in the
+// InAllocTypes vector match the alloc types for callee edges in the given
+// clone. Because the InAllocTypes were computed from the original node's callee
+// edges, and other cloning could have happened after this clone was created, we
+// need to find the matching clone callee edge, which may or may not exist.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool allocTypesMatchClone(
+    const std::vector<uint8_t> &InAllocTypes,
+    const ContextNode<DerivedCCG, FuncTy, CallTy> *Clone) {
+  const ContextNode<DerivedCCG, FuncTy, CallTy> *Node = Clone->CloneOf;
+  assert(Node);
+  // InAllocTypes should have been computed for the original node's callee
+  // edges.
+  assert(InAllocTypes.size() == Node->CalleeEdges.size());
+  // First create a map of the clone callee edge callees to the edge alloc type.
+  DenseMap<const ContextNode<DerivedCCG, FuncTy, CallTy> *, uint8_t>
+      EdgeCalleeMap;
+  for (const auto &E : Clone->CalleeEdges) {
+    assert(!EdgeCalleeMap.contains(E->Callee));
+    EdgeCalleeMap[E->Callee] = E->AllocTypes;
+  }
+  // Next, walk the original node's callees, and look for the corresponding
+  // clone edge to that callee.
+  for (unsigned I = 0; I < Node->CalleeEdges.size(); I++) {
+    auto Iter = EdgeCalleeMap.find(Node->CalleeEdges[I]->Callee);
+    // Not found is ok, we will simply add an edge if we use this clone.
+    if (Iter == EdgeCalleeMap.end())
+      continue;
+    // Can share if one of the edges is None type - don't
+    // care about the type along that edge as it doesn't
+    // exist for those context ids.
+    if (InAllocTypes[I] == (uint8_t)AllocationType::None ||
+        Iter->second == (uint8_t)AllocationType::None)
+      continue;
+    if (allocTypeToUse(Iter->second) != allocTypeToUse(InAllocTypes[I]))
+      return false;
+  }
+  return true;
+}
+
 } // end anonymous namespace
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -1352,6 +1395,17 @@ static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
     }
     assert(NodeContextIds == CalleeEdgeContextIds);
   }
+  // FIXME: Since this checking is only invoked under an option, we should
+  // change the error checking from using assert to something that will trigger
+  // an error on a release build.
+#ifndef NDEBUG
+  // Make sure we don't end up with duplicate edges between the same caller and
+  // callee.
+  DenseSet<ContextNode<DerivedCCG, FuncTy, CallTy> *> NodeSet;
+  for (const auto &E : Node->CalleeEdges)
+    NodeSet.insert(E->Callee);
+  assert(NodeSet.size() == Node->CalleeEdges.size());
+#endif
 }
 
 template <typename DerivedCCG, typename FuncTy, typename CallTy>
@@ -3125,7 +3179,15 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
     // from the same callers as the old node. That should be true in the current
     // use case, where we will remove None-type edges after copying over all
     // caller edges from the callee.
-    assert(IsNewNode || NewCaller->findEdgeFromCaller(OldCallerEdge->Caller));
+    auto *ExistingCallerEdge =
+        NewCaller->findEdgeFromCaller(OldCallerEdge->Caller);
+    assert(IsNewNode || ExistingCallerEdge);
+    if (ExistingCallerEdge) {
+      ExistingCallerEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
+                                                 EdgeContextIdsToMove.end());
+      ExistingCallerEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
+      continue;
+    }
     auto NewEdge = std::make_shared<ContextEdge>(
         NewCaller, OldCallerEdge->Caller,
         computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
@@ -3345,11 +3407,22 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
           allocTypeToUse(CallerAllocTypeForAlloc))
         continue;
 
-      if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
-              CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
-        continue;
-      Clone = CurClone;
-      break;
+      bool BothSingleAlloc = hasSingleAllocType(CurClone->AllocTypes) &&
+                             hasSingleAllocType(CallerAllocTypeForAlloc);
+      // The above check should mean that if both have single alloc types that
+      // they should be equal.
+      assert(!BothSingleAlloc ||
+             CurClone->AllocTypes == CallerAllocTypeForAlloc);
+
+      // If either both have a single alloc type (which are the same), or if the
+      // clone's callee edges have the same alloc types as those for the current
+      // allocation on Node's callee edges (CalleeEdgeAllocTypesForCallerEdge),
+      // then we can reuse this clone.
+      if (BothSingleAlloc || allocTypesMatchClone<DerivedCCG, FuncTy, CallTy>(
+                                 CalleeEdgeAllocTypesForCallerEdge, CurClone)) {
+        Clone = CurClone;
+        break;
+      }
     }
 
     // The edge iterator is adjusted when we move the CallerEdge to the clone.
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index b50a700e09038f1..ad16b0b3501495e 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -172,14 +172,14 @@ namespace {
 
 class FunctionNode {
   mutable AssertingVH<Function> F;
-  IRHash Hash;
+  stable_hash Hash;
 
 public:
   // Note the hash is recalculated potentially multiple times, but it is cheap.
   FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}
 
   Function *getFunc() const { return F; }
-  IRHash getHash() const { return Hash; }
+  stable_hash getHash() const { return Hash; }
 
   /// Replace the reference to the function F by the function G, assuming their
   /// implementations are equal.
@@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) {
 
   // All functions in the module, ordered by hash. Functions with a unique
   // hash value are easily eliminated.
-  std::vector<std::pair<IRHash, Function *>> HashedFuncs;
+  std::vector<std::pair<stable_hash, Function *>> HashedFuncs;
   for (Function &Func : M) {
     if (isEligibleForMerging(Func)) {
       HashedFuncs.push_back({StructuralHash(Func), &Func});
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 35664a5c7a2ac27..9e25620710fc84a 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1093,6 +1093,7 @@ struct OpenMPOpt {
       CGStartBB->getTerminator()->setSuccessor(0, StartBB);
       assert(EndBB != nullptr && "EndBB should not be null");
       EndBB->getTerminator()->setSuccessor(0, CGEndBB);
+      return Error::success();
     };
 
     auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
@@ -1101,7 +1102,7 @@ struct OpenMPOpt {
       return CodeGenIP;
     };
 
-    auto FiniCB = [&](InsertPointTy CodeGenIP) {};
+    auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
 
     /// Create a sequential execution region within a merged parallel region,
     /// encapsulated in a master construct with a barrier for synchronization.
@@ -1132,8 +1133,9 @@ struct OpenMPOpt {
         CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
         assert(SeqEndBB != nullptr && "SeqEndBB should not be null");
         SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
+        return Error::success();
       };
-      auto FiniCB = [&](InsertPointTy CodeGenIP) {};
+      auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
 
       // Find outputs from the sequential region to outside users and
       // broadcast their values to them.
@@ -1176,12 +1178,15 @@ struct OpenMPOpt {
 
       OpenMPIRBuilder::LocationDescription Loc(
           InsertPointTy(ParentBB, ParentBB->end()), DL);
-      InsertPointTy SeqAfterIP =
+      OpenMPIRBuilder::InsertPointOrErrorTy SeqAfterIP =
           OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
+      assert(SeqAfterIP && "Unexpected error creating master");
 
-      OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
+      OpenMPIRBuilder::InsertPointOrErrorTy BarrierAfterIP =
+          OMPInfoCache.OMPBuilder.createBarrier(*SeqAfterIP, OMPD_parallel);
+      assert(BarrierAfterIP && "Unexpected error creating barrier");
 
-      BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
+      BranchInst::Create(SeqAfterBB, SeqAfterIP->getBlock());
 
       LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFn
                         << "\n");
@@ -1251,10 +1256,12 @@ struct OpenMPOpt {
           OriginalFn->getEntryBlock().getFirstInsertionPt());
       // Create the merged parallel region with default proc binding, to
       // avoid overriding binding settings, and without explicit cancellation.
-      InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
-          Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
-          OMP_PROC_BIND_default, /* IsCancellable */ false);
-      BranchInst::Create(AfterBB, AfterIP.getBlock());
+      OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+          OMPInfoCache.OMPBuilder.createParallel(
+              Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
+              OMP_PROC_BIND_default, /* IsCancellable */ false);
+      assert(AfterIP && "Unexpected error creating parallel");
+      BranchInst::Create(AfterBB, AfterIP->getBlock());
 
       // Perform the actual outlining.
       OMPInfoCache.OMPBuilder.finalize(OriginalFn);
@@ -1290,10 +1297,12 @@ struct OpenMPOpt {
         if (CI != MergableCIs.back()) {
           // TODO: Remove barrier if the merged parallel region includes the
           // 'nowait' clause.
-          OMPInfoCache.OMPBuilder.createBarrier(
-              InsertPointTy(NewCI->getParent(),
-                            NewCI->getNextNode()->getIterator()),
-              OMPD_parallel);
+          OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+              OMPInfoCache.OMPBuilder.createBarrier(
+                  InsertPointTy(NewCI->getParent(),
+                                NewCI->getNextNode()->getIterator()),
+                  OMPD_parallel);
+          assert(AfterIP && "Unexpected error creating barrier");
         }
 
         CI->eraseFromParent();
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 338e9772c7cc088..6bb39cabb0988b9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1976,6 +1976,22 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
     return new ICmpInst(Pred, LShr, Constant::getNullValue(LShr->getType()));
   }
 
+  // (icmp eq/ne (and (add A, Addend), Msk), C)
+  //    -> (icmp eq/ne (and A, Msk), (and (sub C, Addend), Msk))
+  {
+    Value *A;
+    const APInt *Addend, *Msk;
+    if (match(And, m_And(m_OneUse(m_Add(m_Value(A), m_APInt(Addend))),
+                         m_APInt(Msk))) &&
+        Msk->isMask() && C.ule(*Msk)) {
+      APInt NewComperand = (C - *Addend) & *Msk;
+      Value* MaskA = Builder.CreateAnd(A, ConstantInt::get(A->getType(), *Msk));
+      return new ICmpInst(
+          Pred, MaskA,
+          Constant::getIntegerValue(MaskA->getType(), NewComperand));
+    }
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index f4f3644acfe5ea0..b9c165da906da4b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1427,6 +1427,18 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
     if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold))
       return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); });
 
+  // log2(trunc x) -> trunc log2(X)
+  // FIXME: Require one use?
+  if (match(Op, m_Trunc(m_Value(X)))) {
+    auto *TI = cast<TruncInst>(Op);
+    if (AssumeNonZero || TI->hasNoUnsignedWrap())
+      if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold))
+        return IfFold([&]() {
+          return Builder.CreateTrunc(LogX, Op->getType(), "",
+                                     /*IsNUW=*/TI->hasNoUnsignedWrap());
+        });
+  }
+
   // log2(X << Y) -> log2(X) + Y
   // FIXME: Require one use unless X is 1?
   if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) {
@@ -1437,6 +1449,24 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
         return IfFold([&]() { return Builder.CreateAdd(LogX, Y); });
   }
 
+  // log2(X >>u Y) -> log2(X) - Y
+  // FIXME: Require one use?
+  if (match(Op, m_LShr(m_Value(X), m_Value(Y)))) {
+    auto *PEO = cast<PossiblyExactOperator>(Op);
+    if (AssumeNonZero || PEO->isExact())
+      if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold))
+        return IfFold([&]() { return Builder.CreateSub(LogX, Y); });
+  }
+
+  // log2(X & Y) -> either log2(X) or log2(Y)
+  // This requires `AssumeNonZero` as `X & Y` may be zero when X != Y.
+  if (AssumeNonZero && match(Op, m_And(m_Value(X), m_Value(Y)))) {
+    if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold))
+      return IfFold([&]() { return LogX; });
+    if (Value *LogY = takeLog2(Builder, Y, Depth, AssumeNonZero, DoFold))
+      return IfFold([&]() { return LogY; });
+  }
+
   // log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y)
   // FIXME: Require one use?
   if (SelectInst *SI = dyn_cast<SelectInst>(Op))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index d68ae64f08aa90a..454fe5a91d375a3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -2900,6 +2900,21 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (Instruction *I = foldIdentityPaddedShuffles(SVI))
     return I;
 
+  if (match(RHS, m_Constant())) {
+    if (auto *SI = dyn_cast<SelectInst>(LHS)) {
+      // We cannot do this fold for elementwise select since ShuffleVector is
+      // not elementwise.
+      if (SI->getCondition()->getType()->isIntegerTy()) {
+        if (Instruction *I = FoldOpIntoSelect(SVI, SI))
+          return I;
+      }
+    }
+    if (auto *PN = dyn_cast<PHINode>(LHS)) {
+      if (Instruction *I = foldOpIntoPhi(SVI, PN))
+        return I;
+    }
+  }
+
   if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
     Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder);
     return replaceInstUsesWith(SVI, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index c8b9f166b160205..2a54390c0f1882d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3753,7 +3753,9 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
   }
 
   // Replace all dominated uses of the condition with true/false
-  if (BI.getSuccessor(0) != BI.getSuccessor(1)) {
+  // Ignore constant expressions to avoid iterating over uses on other
+  // functions.
+  if (!isa<Constant>(Cond) && BI.getSuccessor(0) != BI.getSuccessor(1)) {
     for (auto &U : make_early_inc_range(Cond->uses())) {
       BasicBlockEdge Edge0(BI.getParent(), BI.getSuccessor(0));
       if (DT.dominates(Edge0, U)) {
@@ -4087,7 +4089,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
   if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
     // Bail out if the aggregate contains scalable vector type
     if (auto *STy = dyn_cast<StructType>(Agg->getType());
-        STy && STy->containsScalableVectorType())
+        STy && STy->isScalableTy())
       return nullptr;
 
     // If the (non-volatile) load only has one use, we can rewrite this to a
@@ -5462,6 +5464,8 @@ static bool combineInstructionsOverFunction(
     BranchProbabilityInfo *BPI, ProfileSummaryInfo *PSI,
     const InstCombineOptions &Opts) {
   auto &DL = F.getDataLayout();
+  bool VerifyFixpoint = Opts.VerifyFixpoint &&
+                        !F.hasFnAttribute("instcombine-no-verify-fixpoint");
 
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
@@ -5486,7 +5490,7 @@ static bool combineInstructionsOverFunction(
   while (true) {
     ++Iteration;
 
-    if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
+    if (Iteration > Opts.MaxIterations && !VerifyFixpoint) {
       LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
                         << " on " << F.getName()
                         << " reached; stopping without verifying fixpoint\n");
@@ -5508,9 +5512,11 @@ static bool combineInstructionsOverFunction(
     MadeIRChange = true;
     if (Iteration > Opts.MaxIterations) {
       report_fatal_error(
-          "Instruction Combining did not reach a fixpoint after " +
-              Twine(Opts.MaxIterations) + " iterations. " +
-              "Use 'instcombine<no-verify-fixpoint>' to suppress this error.",
+          "Instruction Combining on " + Twine(F.getName()) +
+              " did not reach a fixpoint after " + Twine(Opts.MaxIterations) +
+              " iterations. " +
+              "Use 'instcombine<no-verify-fixpoint>' or function attribute "
+              "'instcombine-no-verify-fixpoint' to suppress this error.",
           /*GenCrashDiag=*/false);
     }
   }
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index bceb6135cc1f926..4d8141431a0c191 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -319,6 +319,20 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
     cl::desc("Do not instrument functions with the number of critical edges "
              " greater than this threshold."));
 
+static cl::opt<uint64_t> PGOColdInstrumentEntryThreshold(
+    "pgo-cold-instrument-entry-threshold", cl::init(0), cl::Hidden,
+    cl::desc("For cold function instrumentation, skip instrumenting functions "
+             "whose entry count is above the given value."));
+
+static cl::opt<bool> PGOTreatUnknownAsCold(
+    "pgo-treat-unknown-as-cold", cl::init(false), cl::Hidden,
+    cl::desc("For cold function instrumentation, treat count unknown(e.g. "
+             "unprofiled) functions as cold."));
+
+cl::opt<bool> PGOInstrumentColdFunctionOnly(
+    "pgo-instrument-cold-function-only", cl::init(false), cl::Hidden,
+    cl::desc("Enable cold function only instrumentation."));
+
 extern cl::opt<unsigned> MaxNumVTableAnnotations;
 
 namespace llvm {
@@ -1897,6 +1911,11 @@ static bool skipPGOGen(const Function &F) {
     return true;
   if (F.getInstructionCount() < PGOFunctionSizeThreshold)
     return true;
+  if (PGOInstrumentColdFunctionOnly) {
+    if (auto EntryCount = F.getEntryCount())
+      return EntryCount->getCount() > PGOColdInstrumentEntryThreshold;
+    return !PGOTreatUnknownAsCold;
+  }
   return false;
 }
 
diff --git a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp
index c4cb72ab2e4da90..88cb04695217d50 100644
--- a/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/RealtimeSanitizer.cpp
@@ -69,7 +69,7 @@ static PreservedAnalyses runSanitizeRealtime(Function &Fn) {
   return rtsanPreservedCFGAnalyses();
 }
 
-static PreservedAnalyses runSanitizeRealtimeUnsafe(Function &Fn) {
+static PreservedAnalyses runSanitizeRealtimeBlocking(Function &Fn) {
   IRBuilder<> Builder(&Fn.front().front());
   Value *Name = Builder.CreateGlobalString(demangle(Fn.getName()));
   insertCallAtFunctionEntryPoint(Fn, "__rtsan_notify_blocking_call", {Name});
@@ -84,8 +84,8 @@ PreservedAnalyses RealtimeSanitizerPass::run(Function &Fn,
   if (Fn.hasFnAttribute(Attribute::SanitizeRealtime))
     return runSanitizeRealtime(Fn);
 
-  if (Fn.hasFnAttribute(Attribute::SanitizeRealtimeUnsafe))
-    return runSanitizeRealtimeUnsafe(Fn);
+  if (Fn.hasFnAttribute(Attribute::SanitizeRealtimeBlocking))
+    return runSanitizeRealtimeBlocking(Fn);
 
   return PreservedAnalyses::all();
 }
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 4a6dedc93d30650..9b913e5c2a04a53 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -953,8 +953,7 @@ bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI,
   this->Ctx = &Fn.getContext();
   this->Entry = &Entry;
   this->PSI = PSI;
-  this->OptForSize = Entry.getParent()->hasOptSize() ||
-                     llvm::shouldOptimizeForSize(Entry.getParent(), PSI, BFI,
+  this->OptForSize = llvm::shouldOptimizeForSize(Entry.getParent(), PSI, BFI,
                                                  PGSOQueryType::IRPass);
 
   // Collect all constant candidates.
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index db82f75bad5f34c..9b4a19106d394b9 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -586,11 +586,8 @@ class LoadEliminationForLoop {
       }
 
       auto *HeaderBB = L->getHeader();
-      auto *F = HeaderBB->getParent();
-      bool OptForSize = F->hasOptSize() ||
-                        llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
-                                                    PGSOQueryType::IRPass);
-      if (OptForSize) {
+      if (llvm::shouldOptimizeForSize(HeaderBB, PSI, BFI,
+                                      PGSOQueryType::IRPass)) {
         LLVM_DEBUG(
             dbgs() << "Versioning is needed but not allowed when optimizing "
                       "for size.\n");
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 13323604eb514a8..5fd4fd78c28a953 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -1179,6 +1179,9 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
   case LibFunc_erf:
   case LibFunc_erff:
   case LibFunc_erfl:
+  case LibFunc_tgamma:
+  case LibFunc_tgammaf:
+  case LibFunc_tgammal:
   case LibFunc_exp:
   case LibFunc_expf:
   case LibFunc_expl:
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 15b26a38cc28ef2..ed4ad15e5ab6952 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -953,7 +953,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       case Attribute::SanitizeHWAddress:
       case Attribute::SanitizeMemTag:
       case Attribute::SanitizeRealtime:
-      case Attribute::SanitizeRealtimeUnsafe:
+      case Attribute::SanitizeRealtimeBlocking:
       case Attribute::SpeculativeLoadHardening:
       case Attribute::StackProtect:
       case Attribute::StackProtectReq:
diff --git a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 3a33b591d355826..6337913cdbbeb77 100644
--- a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -55,8 +55,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
     for (unsigned i = 0; i < CBI->getNumSuccessors(); i++) {
       auto *Succ = CBI->getSuccessor(i);
       if (!Succ->getSinglePredecessor()) {
-        assert(isCriticalEdge(II, i) && "Expected a critical edge!");
-        [[maybe_unused]] BasicBlock *BB = SplitCriticalEdge(II, i);
+        assert(isCriticalEdge(CBI, i) && "Expected a critical edge!");
+        [[maybe_unused]] BasicBlock *BB = SplitCriticalEdge(CBI, i);
         assert(BB && "Unable to split critical edge.");
       }
     }
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 79e91ad097cf00e..d85e0d994660221 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1413,8 +1413,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
     return nullptr;
   }
 
-  bool OptForSize = CI->getFunction()->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+  bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
                                                 PGSOQueryType::IRPass);
 
   // If the char is variable but the input str and length are not we can turn
@@ -3482,10 +3481,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
       return B.CreateIntCast(PtrDiff, CI->getType(), false);
     }
 
-    bool OptForSize = CI->getFunction()->hasOptSize() ||
-                      llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
-                                                  PGSOQueryType::IRPass);
-    if (OptForSize)
+    if (llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+                                    PGSOQueryType::IRPass))
       return nullptr;
 
     Value *Len = emitStrLen(CI->getArgOperand(2), B, DL, TLI);
@@ -3795,10 +3792,8 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilderBase &B) {
 
   // Don't rewrite fputs to fwrite when optimising for size because fwrite
   // requires more arguments and thus extra MOVs are required.
-  bool OptForSize = CI->getFunction()->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
-                                                PGSOQueryType::IRPass);
-  if (OptForSize)
+  if (llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+                                  PGSOQueryType::IRPass))
     return nullptr;
 
   // We can't optimize if return value is used.
diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp
index 09c4c1c3c511ff8..7c95e7e6b996b4f 100644
--- a/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -99,6 +99,8 @@ struct BasicBlockBFIAdapter {
 bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
                                  BlockFrequencyInfo *BFI,
                                  PGSOQueryType QueryType) {
+  if (F->hasOptSize())
+    return true;
   return shouldFuncOptimizeForSizeImpl(F, PSI, BFI, QueryType);
 }
 
@@ -106,5 +108,7 @@ bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
                                  BlockFrequencyInfo *BFI,
                                  PGSOQueryType QueryType) {
   assert(BB);
+  if (BB->getParent()->hasOptSize())
+    return true;
   return shouldOptimizeForSizeImpl(BB, PSI, BFI, QueryType);
 }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 43be72f0f34d45d..f1568781252c060 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -460,11 +460,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
   const auto &Strides =
     LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
 
-  Function *F = TheLoop->getHeader()->getParent();
-  bool OptForSize = F->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
-                                                PGSOQueryType::IRPass);
-  bool CanAddPredicate = !OptForSize;
+  bool CanAddPredicate = !llvm::shouldOptimizeForSize(
+      TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
   int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
                             CanAddPredicate, false).value_or(0);
   if (Stride == 1 || Stride == -1)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 1c8d541ef2c51fd..b2745c81dec8885 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -435,9 +435,9 @@ class LoopVectorizationPlanner {
   /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
   /// according to the best selected \p VF and  \p UF.
   ///
-  /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
-  /// vectorization re-using plans for both the main and epilogue vector loops.
-  /// It should be removed once the re-use issue has been fixed.
+  /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
+  /// epilogue vector loop. It should be removed once the re-use issue has been
+  /// fixed.
   /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
   /// to re-use expansion results generated during main plan execution.
   ///
@@ -447,7 +447,7 @@ class LoopVectorizationPlanner {
   DenseMap<const SCEV *, Value *>
   executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
               InnerLoopVectorizer &LB, DominatorTree *DT,
-              bool IsEpilogueVectorization,
+              bool VectorizingEpilogue,
               const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e1173ddd71af9c5..150fc4a42b4847f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -581,8 +581,8 @@ class InnerLoopVectorizer {
 
   /// Allow subclasses to override and print debug traces before/after vplan
   /// execution, when trace information is requested.
-  virtual void printDebugTracesAtStart(){};
-  virtual void printDebugTracesAtEnd(){};
+  virtual void printDebugTracesAtStart() {}
+  virtual void printDebugTracesAtEnd() {}
 
   /// The original loop.
   Loop *OrigLoop;
@@ -1310,7 +1310,7 @@ class LoopVectorizationCostModel {
       return false;
     case cl::BOU_FALSE:
       return true;
-    };
+    }
     llvm_unreachable("impossible case value");
   }
 
@@ -4492,6 +4492,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
       case VPDef::VPInstructionSC:
       case VPDef::VPCanonicalIVPHISC:
       case VPDef::VPVectorPointerSC:
+      case VPDef::VPReverseVectorPointerSC:
       case VPDef::VPExpandSCEVSC:
       case VPDef::VPEVLBasedIVPHISC:
       case VPDef::VPPredInstPHISC:
@@ -7561,81 +7562,76 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
   }
 }
 
-// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
-// create a merge phi node for it.
-static void createAndCollectMergePhiForReduction(
-    VPInstruction *RedResult,
-    VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
-    bool VectorizingEpilogue) {
-  if (!RedResult ||
-      RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
+// If \p R is a ComputeReductionResult when vectorizing the epilog loop,
+// fix the reduction's scalar PHI node by adding the incoming value from the
+// main vector loop.
+static void fixReductionScalarResumeWhenVectorizingEpilog(
+    VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock) {
+  auto *EpiRedResult = dyn_cast<VPInstruction>(R);
+  if (!EpiRedResult ||
+      EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult)
     return;
 
-  auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
-  const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
-
-  Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
-  auto *ResumePhi =
-      dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
-  if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
-                                 RdxDesc.getRecurrenceKind())) {
-    auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
-    assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
-    assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
-    ResumePhi = cast<PHINode>(Cmp->getOperand(0));
-  }
-  assert((!VectorizingEpilogue || ResumePhi) &&
-         "when vectorizing the epilogue loop, we need a resume phi from main "
-         "vector loop");
-
-  // TODO: bc.merge.rdx should not be created here, instead it should be
-  // modeled in VPlan.
-  BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
-  // Create a phi node that merges control-flow from the backedge-taken check
-  // block and the middle block.
-  auto *BCBlockPhi =
-      PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
-                      LoopScalarPreHeader->getTerminator()->getIterator());
-
-  // If we are fixing reductions in the epilogue loop then we should already
-  // have created a bc.merge.rdx Phi after the main vector body. Ensure that
-  // we carry over the incoming values correctly.
+  auto *EpiRedHeaderPhi =
+      cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
+  const RecurrenceDescriptor &RdxDesc =
+      EpiRedHeaderPhi->getRecurrenceDescriptor();
+  Value *MainResumeValue =
+      EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
+  if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
+          RdxDesc.getRecurrenceKind())) {
+    auto *Cmp = cast<ICmpInst>(MainResumeValue);
+    assert(Cmp->getPredicate() == CmpInst::ICMP_NE &&
+           "AnyOf expected to start with ICMP_NE");
+    assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() &&
+           "AnyOf expected to start by comparing main resume value to original "
+           "start value");
+    MainResumeValue = Cmp->getOperand(0);
+  }
+  PHINode *MainResumePhi = cast<PHINode>(MainResumeValue);
+
+  // When fixing reductions in the epilogue loop we should already have
+  // created a bc.merge.rdx Phi after the main vector body. Ensure that we carry
+  // over the incoming values correctly.
+  using namespace VPlanPatternMatch;
+  auto IsResumePhi = [](VPUser *U) {
+    return match(
+        U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(), m_VPValue()));
+  };
+  assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 &&
+         "ResumePhi must have a single user");
+  auto *EpiResumePhiVPI =
+      cast<VPInstruction>(*find_if(EpiRedResult->users(), IsResumePhi));
+  auto *EpiResumePhi = cast<PHINode>(State.get(EpiResumePhiVPI, true));
+  BasicBlock *LoopScalarPreHeader = EpiResumePhi->getParent();
+  bool Updated = false;
   for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
-    if (Incoming == LoopMiddleBlock)
-      BCBlockPhi->addIncoming(FinalValue, Incoming);
-    else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
-      BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
-                              Incoming);
-    else
-      BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
+    if (is_contained(MainResumePhi->blocks(), Incoming)) {
+      assert(EpiResumePhi->getIncomingValueForBlock(Incoming) ==
+                 RdxDesc.getRecurrenceStartValue() &&
+             "Trying to reset unexpected value");
+      assert(!Updated && "Should update at most 1 incoming value");
+      EpiResumePhi->setIncomingValueForBlock(
+          Incoming, MainResumePhi->getIncomingValueForBlock(Incoming));
+      Updated = true;
+    }
   }
-
-  auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
-  // TODO: This fixup should instead be modeled in VPlan.
-  // Fix the scalar loop reduction variable with the incoming reduction sum
-  // from the vector body and from the backedge value.
-  int IncomingEdgeBlockIdx =
-      OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
-  assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
-  // Pick the other block.
-  int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
-  OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
-  Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
-  OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
+  assert(Updated && "Must update EpiResumePhi.");
+  (void)Updated;
 }
 
 DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
     ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
-    InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization,
+    InnerLoopVectorizer &ILV, DominatorTree *DT, bool VectorizingEpilogue,
     const DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
   assert(BestVPlan.hasVF(BestVF) &&
          "Trying to execute plan with unsupported VF");
   assert(BestVPlan.hasUF(BestUF) &&
          "Trying to execute plan with unsupported UF");
   assert(
-      (IsEpilogueVectorization || !ExpandedSCEVs) &&
+      ((VectorizingEpilogue && ExpandedSCEVs) ||
+       (!VectorizingEpilogue && !ExpandedSCEVs)) &&
       "expanded SCEVs to reuse can only be used during epilogue vectorization");
-  (void)IsEpilogueVectorization;
 
   // TODO: Move to VPlan transform stage once the transition to the VPlan-based
   // cost model is complete for better cost estimates.
@@ -7661,8 +7657,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   if (!ILV.getTripCount())
     ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
   else
-    assert(IsEpilogueVectorization && "should only re-use the existing trip "
-                                      "count during epilogue vectorization");
+    assert(VectorizingEpilogue && "should only re-use the existing trip "
+                                  "count during epilogue vectorization");
 
   // 1. Set up the skeleton for vectorization, including vector pre-header and
   // middle block. The vector loop is created during VPlan execution.
@@ -7712,11 +7708,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
   // 2.5 Collect reduction resume values.
   auto *ExitVPBB =
       cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
-  for (VPRecipeBase &R : *ExitVPBB) {
-    createAndCollectMergePhiForReduction(
-        dyn_cast<VPInstruction>(&R), State, OrigLoop,
-        State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
-  }
+  if (VectorizingEpilogue)
+    for (VPRecipeBase &R : *ExitVPBB) {
+      fixReductionScalarResumeWhenVectorizingEpilog(
+          &R, State, State.CFG.VPBB2IRBB[ExitVPBB]);
+    }
 
   // 2.6. Maintain Loop Hints
   // Keep all loop hints from the original loop on the vector loop (we'll
@@ -8278,9 +8274,15 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
   if (Consecutive) {
     auto *GEP = dyn_cast<GetElementPtrInst>(
         Ptr->getUnderlyingValue()->stripPointerCasts());
-    auto *VectorPtr = new VPVectorPointerRecipe(
-        Ptr, getLoadStoreType(I), Reverse, GEP ? GEP->isInBounds() : false,
-        I->getDebugLoc());
+    VPSingleDefRecipe *VectorPtr;
+    if (Reverse)
+      VectorPtr = new VPReverseVectorPointerRecipe(
+          Ptr, &Plan.getVF(), getLoadStoreType(I),
+          GEP ? GEP->isInBounds() : false, I->getDebugLoc());
+    else
+      VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
+                                            GEP ? GEP->isInBounds() : false,
+                                            I->getDebugLoc());
     Builder.getInsertBlock()->appendRecipe(VectorPtr);
     Ptr = VectorPtr;
   }
@@ -9063,7 +9065,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
     if (!getDecisionAndClampRange(ApplyIG, Range))
       continue;
     InterleaveGroups.insert(IG);
-  };
+  }
 
   // ---------------------------------------------------------------------------
   // Construct recipes for the instructions in the loop
@@ -9511,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
         });
     FinalReductionResult->insertBefore(*MiddleVPBB, IP);
 
+    // Order is strict: if there are multiple successors, the first is the exit
+    // block, second is the scalar preheader.
+    VPBasicBlock *ScalarPHVPBB =
+        cast<VPBasicBlock>(MiddleVPBB->getSuccessors().back());
+    VPBuilder ScalarPHBuilder(ScalarPHVPBB);
+    auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
+        VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
+        {}, "bc.merge.rdx");
+    auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
+    Plan->addLiveOut(RedPhi, ResumePhiRecipe);
+
     // Adjust AnyOf reductions; replace the reduction phi for the selected value
     // with a boolean reduction phi node to check if the condition is true in
     // any iteration. The final value is selected by the final
@@ -10233,7 +10246,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
         std::unique_ptr<VPlan> BestMainPlan(BestPlan.duplicate());
         auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
-                                             *BestMainPlan, MainILV, DT, true);
+                                             *BestMainPlan, MainILV, DT, false);
         ++LoopsVectorized;
 
         // Second pass vectorizes the epilogue and adjusts the control flow
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2afd02dae3a8b8a..268546fe99e1383 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7947,8 +7947,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           Nodes.insert(E);
         SmallPtrSet<Value *, 8> Values(VL.begin(), VL.end());
         if (any_of(Nodes, [&](const TreeEntry *E) {
-              return all_of(E->Scalars,
-                            [&](Value *V) { return Values.contains(V); });
+              if (all_of(E->Scalars,
+                         [&](Value *V) { return Values.contains(V); }))
+                return true;
+              SmallPtrSet<Value *, 8> EValues(E->Scalars.begin(),
+                                              E->Scalars.end());
+              return (
+                  all_of(VL, [&](Value *V) { return EValues.contains(V); }));
             })) {
           LLVM_DEBUG(dbgs() << "SLP: Gathering due to full overlap.\n");
           if (TryToFindDuplicates(S))
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
index 0e2cd83c37b0cd0..1cc6356300e492b 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp
@@ -7,11 +7,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h"
+#include "llvm/SandboxIR/Instruction.h"
+#include "llvm/SandboxIR/Operator.h"
+#include "llvm/SandboxIR/Utils.h"
 #include "llvm/SandboxIR/Value.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
 
 namespace llvm::sandboxir {
 
+#define DEBUG_TYPE "SBVec:Legality"
+
 #ifndef NDEBUG
 void LegalityResult::dump() const {
   print(dbgs());
@@ -22,11 +28,68 @@ void LegalityResult::dump() const {
 std::optional<ResultReason>
 LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes(
     ArrayRef<Value *> Bndl) {
-  // TODO: Unimplemented.
+  auto *I0 = cast<Instruction>(Bndl[0]);
+  auto Opcode = I0->getOpcode();
+  // If they have different opcodes, then we cannot form a vector (for now).
+  if (any_of(drop_begin(Bndl), [Opcode](Value *V) {
+        return cast<Instruction>(V)->getOpcode() != Opcode;
+      }))
+    return ResultReason::DiffOpcodes;
+
+  // If not the same scalar type, Pack. This will accept scalars and vectors as
+  // long as the element type is the same.
+  Type *ElmTy0 = VecUtils::getElementType(Utils::getExpectedType(I0));
+  if (any_of(drop_begin(Bndl), [ElmTy0](Value *V) {
+        return VecUtils::getElementType(Utils::getExpectedType(V)) != ElmTy0;
+      }))
+    return ResultReason::DiffTypes;
+
+  // TODO: Allow vectorization of instrs with different flags as long as we
+  // change them to the least common one.
+  // For now pack if differnt FastMathFlags.
+  if (isa<FPMathOperator>(I0)) {
+    FastMathFlags FMF0 = cast<Instruction>(Bndl[0])->getFastMathFlags();
+    if (any_of(drop_begin(Bndl), [FMF0](auto *V) {
+          return cast<Instruction>(V)->getFastMathFlags() != FMF0;
+        }))
+      return ResultReason::DiffMathFlags;
+  }
+
+  // TODO: Allow vectorization by using common flags.
+  // For now Pack if they don't have the same wrap flags.
+  bool CanHaveWrapFlags =
+      isa<OverflowingBinaryOperator>(I0) || isa<TruncInst>(I0);
+  if (CanHaveWrapFlags) {
+    bool NUW0 = I0->hasNoUnsignedWrap();
+    bool NSW0 = I0->hasNoSignedWrap();
+    if (any_of(drop_begin(Bndl), [NUW0, NSW0](auto *V) {
+          return cast<Instruction>(V)->hasNoUnsignedWrap() != NUW0 ||
+                 cast<Instruction>(V)->hasNoSignedWrap() != NSW0;
+        })) {
+      return ResultReason::DiffWrapFlags;
+    }
+  }
+
+  // TODO: Missing checks
+
   return std::nullopt;
 }
 
-LegalityResult &LegalityAnalysis::canVectorize(ArrayRef<Value *> Bndl) {
+#ifndef NDEBUG
+static void dumpBndl(ArrayRef<Value *> Bndl) {
+  for (auto *V : Bndl)
+    dbgs() << *V << "\n";
+}
+#endif // NDEBUG
+
+const LegalityResult &LegalityAnalysis::canVectorize(ArrayRef<Value *> Bndl) {
+  // If Bndl contains values other than instructions, we need to Pack.
+  if (any_of(Bndl, [](auto *V) { return !isa<Instruction>(V); })) {
+    LLVM_DEBUG(dbgs() << "Not vectorizing: Not Instructions:\n";
+               dumpBndl(Bndl););
+    return createLegalityResult<Pack>(ResultReason::NotInstructions);
+  }
+
   if (auto ReasonOpt = notVectorizableBasedOnOpcodesAndTypes(Bndl))
     return createLegalityResult<Pack>(*ReasonOpt);
 
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
index f11420e47f3e1f9..66d631edfc4076f 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.cpp
@@ -40,7 +40,7 @@ static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl,
 }
 
 void BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
-  auto LegalityRes = Legality.canVectorize(Bndl);
+  const auto &LegalityRes = Legality.canVectorize(Bndl);
   switch (LegalityRes.getSubclassID()) {
   case LegalityResultID::Widen: {
     auto *I = cast<Instruction>(Bndl[0]);
@@ -59,7 +59,7 @@ void BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl) {
 
 void BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl) { vectorizeRec(Bndl); }
 
-bool BottomUpVec::runOnFunction(Function &F) {
+bool BottomUpVec::runOnFunction(Function &F, const Analyses &A) {
   Change = false;
   // TODO: Start from innermost BBs first
   for (auto &BB : F) {
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp
index 5887d5e8bc2683c..8e3f5b77429c5a0 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Passes/RegionsFromMetadata.cpp
@@ -17,11 +17,11 @@ RegionsFromMetadata::RegionsFromMetadata(StringRef Pipeline)
     : FunctionPass("regions-from-metadata"),
       RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {}
 
-bool RegionsFromMetadata::runOnFunction(Function &F) {
+bool RegionsFromMetadata::runOnFunction(Function &F, const Analyses &A) {
   SmallVector<std::unique_ptr<sandboxir::Region>> Regions =
       sandboxir::Region::createRegionsFromMD(F);
   for (auto &R : Regions) {
-    RPM.runOnRegion(*R);
+    RPM.runOnRegion(*R, A);
   }
   return false;
 }
diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
index c68f9482e337dd5..96d825ed852fb22 100644
--- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizer.cpp
@@ -51,6 +51,7 @@ SandboxVectorizerPass::~SandboxVectorizerPass() = default;
 PreservedAnalyses SandboxVectorizerPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {
   TTI = &AM.getResult<TargetIRAnalysis>(F);
+  SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
 
   bool Changed = runImpl(F);
   if (!Changed)
@@ -82,5 +83,6 @@ bool SandboxVectorizerPass::runImpl(Function &LLVMF) {
   // Create SandboxIR for LLVMF and run BottomUpVec on it.
   sandboxir::Context Ctx(LLVMF.getContext());
   sandboxir::Function &F = *Ctx.createFunction(&LLVMF);
-  return FPM.runOnFunction(F);
+  sandboxir::Analyses A(*SE);
+  return FPM.runOnFunction(F, A);
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 44ffcb954a28421..0484543d2d0398c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -453,13 +453,13 @@ VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
 void VPIRBasicBlock::execute(VPTransformState *State) {
   assert(getHierarchicalSuccessors().size() <= 2 &&
          "VPIRBasicBlock can have at most two successors at the moment!");
-  State->Builder.SetInsertPoint(getIRBasicBlock()->getTerminator());
-  executeRecipes(State, getIRBasicBlock());
+  State->Builder.SetInsertPoint(IRBB->getTerminator());
+  executeRecipes(State, IRBB);
   if (getSingleSuccessor()) {
-    assert(isa<UnreachableInst>(getIRBasicBlock()->getTerminator()));
-    auto *Br = State->Builder.CreateBr(getIRBasicBlock());
+    assert(isa<UnreachableInst>(IRBB->getTerminator()));
+    auto *Br = State->Builder.CreateBr(IRBB);
     Br->setOperand(0, nullptr);
-    getIRBasicBlock()->getTerminator()->eraseFromParent();
+    IRBB->getTerminator()->eraseFromParent();
   }
 
   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f2e6729a2e26596..0e0c64f6df9cbae 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -905,6 +905,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPReplicateSC:
     case VPRecipeBase::VPScalarIVStepsSC:
     case VPRecipeBase::VPVectorPointerSC:
+    case VPRecipeBase::VPReverseVectorPointerSC:
     case VPRecipeBase::VPWidenCallSC:
     case VPRecipeBase::VPWidenCanonicalIVSC:
     case VPRecipeBase::VPWidenCastSC:
@@ -1110,6 +1111,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
            R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
+           R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
            R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
   }
 
@@ -1686,13 +1688,18 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
 
   VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
                          ArrayRef<VPValue *> CallArguments, Type *Ty,
-                         bool MayReadFromMemory, bool MayWriteToMemory,
-                         bool MayHaveSideEffects, DebugLoc DL = {})
+                         DebugLoc DL = {})
       : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
-        VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
-        MayReadFromMemory(MayReadFromMemory),
-        MayWriteToMemory(MayWriteToMemory),
-        MayHaveSideEffects(MayHaveSideEffects) {}
+        VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
+    LLVMContext &Ctx = Ty->getContext();
+    AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
+    MemoryEffects ME = Attrs.getMemoryEffects();
+    MayReadFromMemory = ME.onlyWritesMemory();
+    MayWriteToMemory = ME.onlyReadsMemory();
+    MayHaveSideEffects = MayWriteToMemory ||
+                         !Attrs.hasFnAttr(Attribute::NoUnwind) ||
+                         !Attrs.hasFnAttr(Attribute::WillReturn);
+  }
 
   ~VPWidenIntrinsicRecipe() override = default;
 
@@ -1910,20 +1917,64 @@ class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
 #endif
 };
 
-/// A recipe to compute the pointers for widened memory accesses of IndexTy for
-/// all parts. If IsReverse is true, compute pointers for accessing the input in
-/// reverse order per part.
+/// A recipe to compute the pointers for widened memory accesses of IndexTy
+/// in reverse order.
+class VPReverseVectorPointerRecipe : public VPRecipeWithIRFlags,
+                                     public VPUnrollPartAccessor<2> {
+  Type *IndexedTy;
+
+public:
+  VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy,
+                               bool IsInBounds, DebugLoc DL)
+      : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
+                            ArrayRef<VPValue *>({Ptr, VF}),
+                            GEPFlagsTy(IsInBounds), DL),
+        IndexedTy(IndexedTy) {}
+
+  VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
+
+  VPValue *getVFValue() { return getOperand(1); }
+  const VPValue *getVFValue() const { return getOperand(1); }
+
+  void execute(VPTransformState &State) override;
+
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    return true;
+  }
+
+  /// Returns true if the recipe only uses the first part of operand \p Op.
+  bool onlyFirstPartUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    assert(getNumOperands() <= 2 && "must have at most two operands");
+    return true;
+  }
+
+  VPReverseVectorPointerRecipe *clone() override {
+    return new VPReverseVectorPointerRecipe(
+        getOperand(0), getVFValue(), IndexedTy, isInBounds(), getDebugLoc());
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
+/// A recipe to compute the pointers for widened memory accesses of IndexTy.
 class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
                               public VPUnrollPartAccessor<1> {
   Type *IndexedTy;
-  bool IsReverse;
 
 public:
-  VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
-                        bool IsInBounds, DebugLoc DL)
+  VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsInBounds,
+                        DebugLoc DL)
       : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
                             GEPFlagsTy(IsInBounds), DL),
-        IndexedTy(IndexedTy), IsReverse(IsReverse) {}
+        IndexedTy(IndexedTy) {}
 
   VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
 
@@ -1944,8 +1995,8 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
   }
 
   VPVectorPointerRecipe *clone() override {
-    return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
-                                     isInBounds(), getDebugLoc());
+    return new VPVectorPointerRecipe(getOperand(0), IndexedTy, isInBounds(),
+                                     getDebugLoc());
   }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2004,6 +2055,10 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe {
   /// Generate the phi nodes.
   void execute(VPTransformState &State) override = 0;
 
+  /// Return the cost of this header phi recipe.
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void print(raw_ostream &O, const Twine &Indent,
@@ -2249,6 +2304,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
 
   void execute(VPTransformState &State) override;
 
+  /// Return the cost of this first-order recurrence phi recipe.
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override;
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   /// Print the recipe.
   void print(raw_ostream &O, const Twine &Indent,
@@ -3088,6 +3147,13 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
   /// canonical, i.e.  has the same start and step (of 1) as the canonical IV.
   bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
                    VPValue *Step) const;
+
+  /// Return the cost of this VPCanonicalIVPHIRecipe.
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override {
+    // For now, match the behavior of the legacy cost model.
+    return 0;
+  }
 };
 
 /// A recipe for generating the active lane mask for the vector loop that is
@@ -3150,6 +3216,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
   /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
   void execute(VPTransformState &State) override;
 
+  /// Return the cost of this VPEVLBasedIVPHIRecipe.
+  InstructionCost computeCost(ElementCount VF,
+                              VPCostContext &Ctx) const override {
+    // For now, match the behavior of the legacy cost model.
+    return 0;
+  }
+
   /// Returns true if the recipe only uses the first lane of operand \p Op.
   bool onlyFirstLaneUsed(const VPValue *Op) const override {
     assert(is_contained(operands(), Op) &&
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index 3eb5f3f40f842ad..8b8ab6be99b0d57 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -263,9 +263,10 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
               [](const auto *R) { return R->getScalarType(); })
           .Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
                 VPScalarIVStepsRecipe, VPWidenGEPRecipe, VPVectorPointerRecipe,
-                VPWidenCanonicalIVRecipe>([this](const VPRecipeBase *R) {
-            return inferScalarType(R->getOperand(0));
-          })
+                VPReverseVectorPointerRecipe, VPWidenCanonicalIVRecipe>(
+              [this](const VPRecipeBase *R) {
+                return inferScalarType(R->getOperand(0));
+              })
           .Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
                 VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
                 VPWidenSelectRecipe>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 0eb4f7c7c88cee7..de7023167df8990 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -162,6 +162,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
   case VPDerivedIVSC:
   case VPPredInstPHISC:
   case VPScalarCastSC:
+  case VPReverseVectorPointerSC:
     return false;
   case VPInstructionSC:
     return mayWriteToMemory();
@@ -1524,6 +1525,11 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
 
 InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,
                                                VPCostContext &Ctx) const {
+  // TODO: In some cases, VPWidenCastRecipes are created but not considered in
+  // the legacy cost model, including truncates/extends when evaluating a
+  // reduction in a smaller type.
+  if (!getUnderlyingValue())
+    return 0;
   // Computes the CastContextHint from a recipes that may access memory.
   auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {
     if (VF.isScalar())
@@ -1583,6 +1589,11 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
+InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
+                                               VPCostContext &Ctx) const {
+  return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
+}
+
 /// This function adds
 /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
 /// to each vector element of Val. The sequence starts at StartIndex.
@@ -1966,38 +1977,63 @@ void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
 }
 #endif
 
-void VPVectorPointerRecipe ::execute(VPTransformState &State) {
-  auto &Builder = State.Builder;
-  State.setDebugLocFrom(getDebugLoc());
-  unsigned CurrentPart = getUnrollPart(*this);
+static Type *getGEPIndexTy(bool IsScalable, bool IsReverse,
+                           unsigned CurrentPart, IRBuilderBase &Builder) {
   // Use i32 for the gep index type when the value is constant,
   // or query DataLayout for a more suitable index type otherwise.
   const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout();
-  Type *IndexTy = State.VF.isScalable() && (IsReverse || CurrentPart > 0)
-                      ? DL.getIndexType(Builder.getPtrTy(0))
-                      : Builder.getInt32Ty();
+  return IsScalable && (IsReverse || CurrentPart > 0)
+             ? DL.getIndexType(Builder.getPtrTy(0))
+             : Builder.getInt32Ty();
+}
+
+void VPReverseVectorPointerRecipe::execute(VPTransformState &State) {
+  auto &Builder = State.Builder;
+  State.setDebugLocFrom(getDebugLoc());
+  unsigned CurrentPart = getUnrollPart(*this);
+  Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true,
+                                CurrentPart, Builder);
+
+  // The wide store needs to start at the last vector element.
+  Value *RunTimeVF = State.get(getVFValue(), VPLane(0));
+  if (IndexTy != RunTimeVF->getType())
+    RunTimeVF = Builder.CreateZExtOrTrunc(RunTimeVF, IndexTy);
+  // NumElt = -CurrentPart * RunTimeVF
+  Value *NumElt = Builder.CreateMul(
+      ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
+  // LastLane = 1 - RunTimeVF
+  Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
   Value *Ptr = State.get(getOperand(0), VPLane(0));
   bool InBounds = isInBounds();
+  Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
+  ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
 
-  Value *ResultPtr = nullptr;
-  if (IsReverse) {
-    // If the address is consecutive but reversed, then the
-    // wide store needs to start at the last vector element.
-    // RunTimeVF =  VScale * VF.getKnownMinValue()
-    // For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
-    Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
-    // NumElt = -CurrentPart * RunTimeVF
-    Value *NumElt = Builder.CreateMul(
-        ConstantInt::get(IndexTy, -(int64_t)CurrentPart), RunTimeVF);
-    // LastLane = 1 - RunTimeVF
-    Value *LastLane =
-        Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
-    ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, NumElt, "", InBounds);
-    ResultPtr = Builder.CreateGEP(IndexedTy, ResultPtr, LastLane, "", InBounds);
-  } else {
-    Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
-    ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
-  }
+  State.set(this, ResultPtr, /*IsScalar*/ true);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPReverseVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
+                                         VPSlotTracker &SlotTracker) const {
+  O << Indent;
+  printAsOperand(O, SlotTracker);
+  O << " = reverse-vector-pointer ";
+  if (isInBounds())
+    O << "inbounds ";
+  printOperands(O, SlotTracker);
+}
+#endif
+
+void VPVectorPointerRecipe::execute(VPTransformState &State) {
+  auto &Builder = State.Builder;
+  State.setDebugLocFrom(getDebugLoc());
+  unsigned CurrentPart = getUnrollPart(*this);
+  Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false,
+                                CurrentPart, Builder);
+  Value *Ptr = State.get(getOperand(0), VPLane(0));
+  bool InBounds = isInBounds();
+
+  Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart);
+  Value *ResultPtr = Builder.CreateGEP(IndexedTy, Ptr, Increment, "", InBounds);
 
   State.set(this, ResultPtr, /*IsScalar*/ true);
 }
@@ -2008,8 +2044,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
   O << Indent;
   printAsOperand(O, SlotTracker);
   O << " = vector-pointer ";
-  if (IsReverse)
-    O << "(reverse) ";
 
   printOperands(O, SlotTracker);
 }
@@ -3305,6 +3339,23 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
   State.set(this, Phi);
 }
 
+InstructionCost
+VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
+                                             VPCostContext &Ctx) const {
+  if (VF.isScalable() && VF.getKnownMinValue() == 1)
+    return InstructionCost::getInvalid();
+
+  SmallVector<int> Mask(VF.getKnownMinValue());
+  std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
+  Type *VectorTy =
+      ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
+
+  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+  return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
+                                cast<VectorType>(VectorTy), Mask, CostKind,
+                                VF.getKnownMinValue() - 1);
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
                                             VPSlotTracker &SlotTracker) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 03c4110761ac6a9..355781f955052e9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1489,7 +1489,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
                 Ops.push_back(&EVL);
                 return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops,
                                                   TypeInfo.inferScalarType(Sel),
-                                                  false, false, false);
+                                                  Sel->getDebugLoc());
               })
 
               .Default([&](VPRecipeBase *R) { return nullptr; });
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
index ca78f32506ef715..1e32865e8ee576d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
@@ -316,12 +316,12 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
     // Add operand indicating the part to generate code for, to recipes still
     // requiring it.
     if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
-            VPVectorPointerRecipe>(Copy) ||
+            VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(Copy) ||
         match(Copy, m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>(
                         m_VPValue())))
       Copy->addOperand(getConstantVPV(Part));
 
-    if (isa<VPVectorPointerRecipe>(R))
+    if (isa<VPVectorPointerRecipe, VPReverseVectorPointerRecipe>(R))
       Copy->setOperand(0, R.getOperand(0));
   }
 }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 1900182f76e0715..89b3ed72b8eb65f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -346,6 +346,7 @@ class VPDef {
     VPScalarCastSC,
     VPScalarIVStepsSC,
     VPVectorPointerSC,
+    VPReverseVectorPointerSC,
     VPWidenCallSC,
     VPWidenCanonicalIVSC,
     VPWidenCastSC,
diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll
index b4740f223eca3a7..c6826760a45bee6 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fround.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll
@@ -1,30 +1,50 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfh | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfh,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v,+f,+d,+zvfhmin,+zvfbfmin | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
 
 define void @floor() {
 ; CHECK-LABEL: 'floor'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.floor.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.floor.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.floor.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.floor.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.floor.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.floor.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.floor.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.floor.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.floor.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.floor.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.floor.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.floor.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x float> @llvm.floor.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x float> @llvm.floor.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x float> @llvm.floor.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <vscale x 16 x float> @llvm.floor.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.floor.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.floor.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 1 x double> @llvm.floor.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call <vscale x 4 x double> @llvm.floor.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call <vscale x 8 x double> @llvm.floor.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.floor.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.floor.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.floor.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.floor.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.floor.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.floor.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.floor.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.floor.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.floor.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.floor.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.floor.f32(float undef)
   call <2 x float> @llvm.floor.v2f32(<2 x float> undef)
   call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
@@ -89,27 +109,47 @@ define void @floor_fp16() {
 
 define void @ceil() {
 ; CHECK-LABEL: 'ceil'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.ceil.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.ceil.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.ceil.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.ceil.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.ceil.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.ceil.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.ceil.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.ceil.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.ceil.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.ceil.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.ceil.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.ceil.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x float> @llvm.ceil.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x float> @llvm.ceil.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <vscale x 16 x float> @llvm.ceil.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.ceil.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.ceil.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 1 x double> @llvm.ceil.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call <vscale x 4 x double> @llvm.ceil.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call <vscale x 8 x double> @llvm.ceil.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.ceil.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.ceil.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.ceil.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.ceil.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.ceil.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.ceil.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.ceil.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.ceil.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.ceil.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.ceil.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.ceil.f32(float undef)
   call <2 x float> @llvm.ceil.v2f32(<2 x float> undef)
   call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
@@ -174,27 +214,47 @@ define void @ceil_fp16() {
 
 define void @trunc() {
 ; CHECK-LABEL: 'trunc'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.trunc.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.trunc.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 2 x float> @llvm.trunc.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 8 x float> @llvm.trunc.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.trunc.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x double> @llvm.trunc.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x double> @llvm.trunc.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.trunc.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.trunc.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.trunc.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.trunc.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.trunc.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.trunc.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.trunc.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.trunc.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.trunc.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.trunc.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x float> @llvm.trunc.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x float> @llvm.trunc.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x float> @llvm.trunc.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x float> @llvm.trunc.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <vscale x 16 x float> @llvm.trunc.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.trunc.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x double> @llvm.trunc.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 1 x double> @llvm.trunc.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %28 = call <vscale x 4 x double> @llvm.trunc.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %29 = call <vscale x 8 x double> @llvm.trunc.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.trunc.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.trunc.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.trunc.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.trunc.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.trunc.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.trunc.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.trunc.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.trunc.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.trunc.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.trunc.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.trunc.f32(float undef)
   call <2 x float> @llvm.trunc.v2f32(<2 x float> undef)
   call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
@@ -259,27 +319,47 @@ define void @trunc_fp16() {
 
 define void @rint() {
 ; CHECK-LABEL: 'rint'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.rint.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 1 x float> @llvm.rint.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 2 x float> @llvm.rint.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 8 x float> @llvm.rint.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.rint.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x double> @llvm.rint.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.rint.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.rint.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.rint.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.rint.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.rint.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.rint.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.rint.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.rint.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.rint.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.rint.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <2 x float> @llvm.rint.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 1 x float> @llvm.rint.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 2 x float> @llvm.rint.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <vscale x 8 x float> @llvm.rint.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <vscale x 16 x float> @llvm.rint.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.rint.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <16 x double> @llvm.rint.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 1 x double> @llvm.rint.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %27 = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %28 = call <vscale x 4 x double> @llvm.rint.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %29 = call <vscale x 8 x double> @llvm.rint.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.rint.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.rint.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.rint.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.rint.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.rint.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.rint.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.rint.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.rint.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.rint.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.rint.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.rint.f32(float undef)
   call <2 x float> @llvm.rint.v2f32(<2 x float> undef)
   call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
@@ -344,27 +424,47 @@ define void @rint_fp16() {
 
 define void @lrint() {
 ; CHECK-LABEL: 'lrint'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call <vscale x 16 x i64> @llvm.lrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call i64 @llvm.lrint.i64.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i64> @llvm.lrint.v16i64.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i64> @llvm.lrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i64> @llvm.lrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i64> @llvm.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i64> @llvm.lrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call i64 @llvm.lrint.i64.bf16(bfloat undef)
+  call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef)
+  call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef)
+  call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef)
+  call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x i64> @llvm.lrint.nvx1i64.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x i64> @llvm.lrint.nvx2i64.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x i64> @llvm.lrint.nvx4i64.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x i64> @llvm.lrint.nvx8i64.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x i64> @llvm.lrint.nvx16i64.nvx16bf16(<vscale x 16 x bfloat> undef)
   call i64 @llvm.lrint.i64.f32(float undef)
   call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef)
   call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef)
@@ -416,27 +516,47 @@ define void @lrint_fp16() {
 
 define void @llrint() {
 ; CHECK-LABEL: 'llrint'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %10 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %16 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %17 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %20 = call <vscale x 16 x i64> @llvm.llrint.nxv16i64.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %21 = call i64 @llvm.llrint.i64.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i64> @llvm.llrint.v16i64.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %26 = call <vscale x 1 x i64> @llvm.llrint.nxv1i64.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %27 = call <vscale x 2 x i64> @llvm.llrint.nxv2i64.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i64> @llvm.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i64> @llvm.llrint.nxv8i64.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call i64 @llvm.llrint.i64.bf16(bfloat undef)
+  call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef)
+  call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef)
+  call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef)
+  call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x i64> @llvm.llrint.nvx1i64.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x i64> @llvm.llrint.nvx2i64.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x i64> @llvm.llrint.nvx4i64.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x i64> @llvm.llrint.nvx8i64.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x i64> @llvm.llrint.nvx16i64.nvx16bf16(<vscale x 16 x bfloat> undef)
   call i64 @llvm.llrint.i64.f32(float undef)
   call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef)
   call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef)
@@ -488,27 +608,47 @@ define void @llrint_fp16() {
 
 define void @nearbyint() {
 ; CHECK-LABEL: 'nearbyint'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call float @llvm.nearbyint.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call double @llvm.nearbyint.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %1 = call bfloat @llvm.nearbyint.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.nearbyint.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.nearbyint.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.nearbyint.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.nearbyint.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.nearbyint.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %11 = call float @llvm.nearbyint.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x float> @llvm.nearbyint.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x float> @llvm.nearbyint.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x float> @llvm.nearbyint.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <vscale x 16 x float> @llvm.nearbyint.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %21 = call double @llvm.nearbyint.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 1 x double> @llvm.nearbyint.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call <vscale x 4 x double> @llvm.nearbyint.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call <vscale x 8 x double> @llvm.nearbyint.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.nearbyint.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.nearbyint.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.nearbyint.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.nearbyint.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.nearbyint.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.nearbyint.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.nearbyint.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.nearbyint.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.nearbyint.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.nearbyint.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.nearbyint.f32(float undef)
   call <2 x float> @llvm.nearbyint.v2f32(<2 x float> undef)
   call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
@@ -573,27 +713,47 @@ define void @nearbyint_fp16() {
 
 define void @round() {
 ; CHECK-LABEL: 'round'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.round.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.round.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.round.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.round.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.round.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.round.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.round.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.round.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.round.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.round.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.round.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.round.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.round.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.round.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.round.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.round.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.round.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.round.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.round.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.round.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.round.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.round.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.round.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.round.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x float> @llvm.round.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x float> @llvm.round.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x float> @llvm.round.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <vscale x 16 x float> @llvm.round.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.round.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.round.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.round.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.round.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.round.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 1 x double> @llvm.round.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call <vscale x 4 x double> @llvm.round.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call <vscale x 8 x double> @llvm.round.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.round.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.round.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.round.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.round.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.round.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.round.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.round.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.round.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.round.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.round.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.round.f32(float undef)
   call <2 x float> @llvm.round.v2f32(<2 x float> undef)
   call <4 x float> @llvm.round.v4f32(<4 x float> undef)
@@ -658,27 +818,47 @@ define void @round_fp16() {
 
 define void @roundeven() {
 ; CHECK-LABEL: 'roundeven'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call float @llvm.roundeven.f32(float undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %2 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %3 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %4 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %5 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %6 = call <vscale x 1 x float> @llvm.roundeven.nxv1f32(<vscale x 1 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %7 = call <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %8 = call <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %9 = call <vscale x 8 x float> @llvm.roundeven.nxv8f32(<vscale x 8 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %10 = call <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call double @llvm.roundeven.f64(double undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x double> @llvm.roundeven.v16f64(<16 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double> undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = call bfloat @llvm.roundeven.bf16(bfloat undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x bfloat> @llvm.roundeven.v2bf16(<2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x bfloat> @llvm.roundeven.v4bf16(<4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x bfloat> @llvm.roundeven.v8bf16(<8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x bfloat> @llvm.roundeven.v16bf16(<16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 1 x bfloat> @llvm.roundeven.nxv1bf16(<vscale x 1 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 2 x bfloat> @llvm.roundeven.nxv2bf16(<vscale x 2 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x bfloat> @llvm.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x bfloat> @llvm.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x bfloat> @llvm.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %11 = call float @llvm.roundeven.f32(float undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %12 = call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %13 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %14 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %15 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %16 = call <vscale x 1 x float> @llvm.roundeven.nxv1f32(<vscale x 1 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %17 = call <vscale x 2 x float> @llvm.roundeven.nxv2f32(<vscale x 2 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %18 = call <vscale x 4 x float> @llvm.roundeven.nxv4f32(<vscale x 4 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %19 = call <vscale x 8 x float> @llvm.roundeven.nxv8f32(<vscale x 8 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %20 = call <vscale x 16 x float> @llvm.roundeven.nxv16f32(<vscale x 16 x float> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %21 = call double @llvm.roundeven.f64(double undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %22 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %23 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %24 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %25 = call <16 x double> @llvm.roundeven.v16f64(<16 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %26 = call <vscale x 1 x double> @llvm.roundeven.nxv1f64(<vscale x 1 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %27 = call <vscale x 2 x double> @llvm.roundeven.nxv2f64(<vscale x 2 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %28 = call <vscale x 4 x double> @llvm.roundeven.nxv4f64(<vscale x 4 x double> undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %29 = call <vscale x 8 x double> @llvm.roundeven.nxv8f64(<vscale x 8 x double> undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call bfloat @llvm.roundeven.bf16(bfloat undef)
+  call <2 x bfloat> @llvm.roundeven.v2bf16(<2 x bfloat> undef)
+  call <4 x bfloat> @llvm.roundeven.v4bf16(<4 x bfloat> undef)
+  call <8 x bfloat> @llvm.roundeven.v8bf16(<8 x bfloat> undef)
+  call <16 x bfloat> @llvm.roundeven.v16bf16(<16 x bfloat> undef)
+  call <vscale x 1 x bfloat> @llvm.roundeven.nvx1bf16(<vscale x 1 x bfloat> undef)
+  call <vscale x 2 x bfloat> @llvm.roundeven.nvx2bf16(<vscale x 2 x bfloat> undef)
+  call <vscale x 4 x bfloat> @llvm.roundeven.nvx4bf16(<vscale x 4 x bfloat> undef)
+  call <vscale x 8 x bfloat> @llvm.roundeven.nvx8bf16(<vscale x 8 x bfloat> undef)
+  call <vscale x 16 x bfloat> @llvm.roundeven.nvx16bf16(<vscale x 16 x bfloat> undef)
   call float @llvm.roundeven.f32(float undef)
   call <2 x float> @llvm.roundeven.v2f32(<2 x float> undef)
   call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
@@ -743,25 +923,43 @@ define void @roundeven_fp16() {
 
 define void @vp_ceil() {
 ; CHECK-LABEL: 'vp_ceil'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.ceil.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.ceil.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.ceil.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.ceil.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.ceil.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.ceil.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.ceil.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.ceil.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.ceil.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.ceil.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.ceil.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.ceil.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.ceil.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.ceil.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.ceil.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.ceil.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.ceil.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.ceil.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
@@ -821,25 +1019,43 @@ define void @vp_ceil_f16() {
 
 define void @vp_floor() {
 ; CHECK-LABEL: 'vp_floor'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.floor.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.floor.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.floor.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.floor.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.floor.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.floor.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.floor.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.floor.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.floor.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.floor.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.floor.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.floor.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.floor.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.floor.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.floor.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.floor.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.floor.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.floor.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
@@ -899,25 +1115,43 @@ define void @vp_floor_f16() {
 
 define void @vp_round() {
 ; CHECK-LABEL: 'vp_round'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.round.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.round.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.round.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.round.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.round.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.round.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.round.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.round.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.round.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.round.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.round.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.round.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.round.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.round.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.round.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.round.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.round.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.round.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
@@ -977,25 +1211,43 @@ define void @vp_round_f16() {
 
 define void @vp_roundeven() {
 ; CHECK-LABEL: 'vp_roundeven'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.roundeven.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.roundeven.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.roundeven.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.roundeven.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.roundeven.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.roundeven.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.roundeven.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.roundeven.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.roundeven.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.roundeven.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.roundeven.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.roundeven.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.roundeven.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.roundeven.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.roundeven.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.roundeven.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.roundeven.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.roundeven.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
@@ -1055,25 +1307,43 @@ define void @vp_roundeven_f16() {
 
 define void @vp_roundtozero() {
 ; CHECK-LABEL: 'vp_roundtozero'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.roundtozero.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.roundtozero.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.roundtozero.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.roundtozero.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.roundtozero.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.roundtozero.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.roundtozero.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.roundtozero.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.roundtozero.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.roundtozero.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.roundtozero.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.roundtozero.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.roundtozero.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.roundtozero.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.roundtozero.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
@@ -1133,25 +1403,43 @@ define void @vp_roundtozero_f16() {
 
 define void @vp_rint() {
 ; CHECK-LABEL: 'vp_rint'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %2 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %3 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %4 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %10 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %11 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.rint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.rint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.rint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.rint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.rint.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.rint.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.rint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.rint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.rint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %10 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %11 = call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %12 = call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %13 = call <16 x float> @llvm.vp.rint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.rint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.rint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.rint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.rint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.rint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %19 = call <2 x double> @llvm.vp.rint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %20 = call <4 x double> @llvm.vp.rint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %21 = call <8 x double> @llvm.vp.rint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %22 = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.rint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.rint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.rint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.rint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.rint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.rint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.rint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.rint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.rint.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.rint.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.rint.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.rint.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.rint.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.rint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.rint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
@@ -1211,25 +1499,43 @@ define void @vp_rint_f16() {
 
 define void @vp_nearbyint() {
 ; CHECK-LABEL: 'vp_nearbyint'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = call <2 x bfloat> @llvm.vp.nearbyint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <4 x bfloat> @llvm.vp.nearbyint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <8 x bfloat> @llvm.vp.nearbyint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <16 x bfloat> @llvm.vp.nearbyint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nxv1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nxv2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nxv4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nxv16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x float> @llvm.vp.nearbyint.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x float> @llvm.vp.nearbyint.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x float> @llvm.vp.nearbyint.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x float> @llvm.vp.nearbyint.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %18 = call <vscale x 16 x float> @llvm.vp.nearbyint.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %19 = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %20 = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %21 = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %22 = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %23 = call <vscale x 1 x double> @llvm.vp.nearbyint.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %24 = call <vscale x 2 x double> @llvm.vp.nearbyint.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %25 = call <vscale x 4 x double> @llvm.vp.nearbyint.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %26 = call <vscale x 8 x double> @llvm.vp.nearbyint.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
+  call <2 x bfloat> @llvm.vp.nearbyint.v2bf16(<2 x bfloat> undef, <2 x i1> undef, i32 undef)
+  call <4 x bfloat> @llvm.vp.nearbyint.v4bf16(<4 x bfloat> undef, <4 x i1> undef, i32 undef)
+  call <8 x bfloat> @llvm.vp.nearbyint.v8bf16(<8 x bfloat> undef, <8 x i1> undef, i32 undef)
+  call <16 x bfloat> @llvm.vp.nearbyint.v16bf16(<16 x bfloat> undef, <16 x i1> undef, i32 undef)
+  call <vscale x 1 x bfloat> @llvm.vp.nearbyint.nvx1bf16(<vscale x 1 x bfloat> undef, <vscale x 1 x i1> undef, i32 undef)
+  call <vscale x 2 x bfloat> @llvm.vp.nearbyint.nvx2bf16(<vscale x 2 x bfloat> undef, <vscale x 2 x i1> undef, i32 undef)
+  call <vscale x 4 x bfloat> @llvm.vp.nearbyint.nvx4bf16(<vscale x 4 x bfloat> undef, <vscale x 4 x i1> undef, i32 undef)
+  call <vscale x 8 x bfloat> @llvm.vp.nearbyint.nvx8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> undef, i32 undef)
+  call <vscale x 16 x bfloat> @llvm.vp.nearbyint.nvx16bf16(<vscale x 16 x bfloat> undef, <vscale x 16 x i1> undef, i32 undef)
   call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef)
   call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef)
   call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
index 588d852d7f26e20..196e7376677a54f 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll
@@ -13,6 +13,12 @@ define void @reduce_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_bfloat'
@@ -24,6 +30,12 @@ define void @reduce_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -34,6 +46,12 @@ define void @reduce_fadd_bfloat() {
   %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+  %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
+  %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
+  %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
+  %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
+  %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
+  %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
   ret void
 }
 
@@ -47,6 +65,12 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half'
@@ -58,6 +82,12 @@ define void @reduce_fadd_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_half'
@@ -69,6 +99,12 @@ define void @reduce_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -79,6 +115,12 @@ define void @reduce_fadd_half() {
   %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
   %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
   %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+  %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef)
+  %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef)
+  %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef)
+  %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
+  %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
+  %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
   ret void
 }
 
@@ -92,6 +134,11 @@ define void @reduce_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_float'
@@ -103,6 +150,11 @@ define void @reduce_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -113,6 +165,11 @@ define void @reduce_fadd_float() {
   %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
   %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
   %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+  %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef)
+  %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef)
+  %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
+  %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
+  %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
   ret void
 }
 
@@ -126,6 +183,10 @@ define void @reduce_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fadd_double'
@@ -137,6 +198,10 @@ define void @reduce_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -147,11 +212,15 @@ define void @reduce_fadd_double() {
   %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
   %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
   %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+  %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef)
+  %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
+  %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
+  %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
   ret void
 }
 
-define void @reduce_oredered_fadd_bfloat() {
-; FP-REDUCE-LABEL: 'reduce_oredered_fadd_bfloat'
+define void @reduce_ordered_fadd_bfloat() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat'
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
@@ -160,9 +229,15 @@ define void @reduce_oredered_fadd_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; SIZE-LABEL: 'reduce_oredered_fadd_bfloat'
+; SIZE-LABEL: 'reduce_ordered_fadd_bfloat'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef)
@@ -171,6 +246,12 @@ define void @reduce_oredered_fadd_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -181,11 +262,17 @@ define void @reduce_oredered_fadd_bfloat() {
   %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+  %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
+  %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
+  %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
+  %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
+  %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
+  %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
   ret void
 }
 
-define void @reduce_oredered_fadd_half() {
-; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half'
+define void @reduce_ordered_fadd_half() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fadd_half'
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
@@ -194,9 +281,15 @@ define void @reduce_oredered_fadd_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; SIZE-LABEL: 'reduce_oredered_fadd_half'
+; SIZE-LABEL: 'reduce_ordered_fadd_half'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef)
@@ -205,6 +298,12 @@ define void @reduce_oredered_fadd_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef)
@@ -215,11 +314,17 @@ define void @reduce_oredered_fadd_half() {
   %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef)
   %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef)
   %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef)
+  %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, <vscale x 1 x half> undef)
+  %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, <vscale x 2 x half> undef)
+  %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, <vscale x 4 x half> undef)
+  %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, <vscale x 8 x half> undef)
+  %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, <vscale x 16 x half> undef)
+  %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, <vscale x 32 x half> undef)
   ret void
 }
 
-define void @reduce_oredered_fadd_float() {
-; FP-REDUCE-LABEL: 'reduce_oredered_fadd_float'
+define void @reduce_ordered_fadd_float() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fadd_float'
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
@@ -228,9 +333,14 @@ define void @reduce_oredered_fadd_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; SIZE-LABEL: 'reduce_oredered_fadd_float'
+; SIZE-LABEL: 'reduce_ordered_fadd_float'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
@@ -239,6 +349,11 @@ define void @reduce_oredered_fadd_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef)
@@ -249,11 +364,16 @@ define void @reduce_oredered_fadd_float() {
   %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef)
   %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef)
   %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef)
+  %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef)
+  %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, <vscale x 2 x float> undef)
+  %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> undef)
+  %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, <vscale x 8 x float> undef)
+  %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, <vscale x 16 x float> undef)
   ret void
 }
 
-define void @reduce_oredered_fadd_double() {
-; FP-REDUCE-LABEL: 'reduce_oredered_fadd_double'
+define void @reduce_ordered_fadd_double() {
+; FP-REDUCE-LABEL: 'reduce_ordered_fadd_double'
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
@@ -262,9 +382,13 @@ define void @reduce_oredered_fadd_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
-; SIZE-LABEL: 'reduce_oredered_fadd_double'
+; SIZE-LABEL: 'reduce_ordered_fadd_double'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
@@ -273,6 +397,10 @@ define void @reduce_oredered_fadd_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef)
@@ -283,30 +411,9 @@ define void @reduce_oredered_fadd_double() {
   %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef)
   %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef)
   %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef)
+  %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, <vscale x 1 x double> undef)
+  %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, <vscale x 2 x double> undef)
+  %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> undef)
+  %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, <vscale x 8 x double> undef)
   ret void
 }
-
-declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
-declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>)
-declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
-declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
-declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
-declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>)
-declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>)
-declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>)
-declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>)
-declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
-declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
-declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
-declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>)
-declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>)
-declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>)
-declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>)
-declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
-declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
-declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
-declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>)
-declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>)
-declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>)
-declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>)
-declare double @llvm.vector.reduce.fadd.v128f64(double, <128 x double>)
diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
index 162562c7b89310d..211bcb1343eea40 100644
--- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll
@@ -13,6 +13,12 @@ define void @reduce_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_bfloat'
@@ -24,6 +30,12 @@ define void @reduce_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -34,6 +46,12 @@ define void @reduce_fmul_bfloat() {
   %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+  %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
+  %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
+  %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
+  %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
+  %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
+  %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
   ret void
 }
 
@@ -47,6 +65,12 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFH-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFH-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half'
@@ -58,6 +82,12 @@ define void @reduce_fmul_half() {
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-ZVFHMIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_half'
@@ -69,6 +99,12 @@ define void @reduce_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -79,6 +115,12 @@ define void @reduce_fmul_half() {
   %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
   %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
   %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
+  %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
+  %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
+  %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
+  %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
+  %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
+  %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
   ret void
 }
 
@@ -92,6 +134,11 @@ define void @reduce_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_float'
@@ -103,6 +150,11 @@ define void @reduce_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -113,6 +165,11 @@ define void @reduce_fmul_float() {
   %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
   %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
   %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
+  %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
+  %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
+  %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
+  %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
+  %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
   ret void
 }
 
@@ -126,6 +183,10 @@ define void @reduce_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_fmul_double'
@@ -137,6 +198,10 @@ define void @reduce_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -147,6 +212,10 @@ define void @reduce_fmul_double() {
   %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
   %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
   %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
+  %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
+  %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
+  %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
+  %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
   ret void
 }
 
@@ -160,6 +229,12 @@ define void @reduce_ordered_fmul_bfloat() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat'
@@ -171,6 +246,12 @@ define void @reduce_ordered_fmul_bfloat() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, <vscale x 1 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, <vscale x 2 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, <vscale x 4 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, <vscale x 8 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, <vscale x 16 x bfloat> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, <vscale x 32 x bfloat> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef)
@@ -181,6 +262,12 @@ define void @reduce_ordered_fmul_bfloat() {
   %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef)
   %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef)
   %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef)
+  %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, <vscale x 1 x bfloat> undef)
+  %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, <vscale x 2 x bfloat> undef)
+  %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, <vscale x 4 x bfloat> undef)
+  %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, <vscale x 8 x bfloat> undef)
+  %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, <vscale x 16 x bfloat> undef)
+  %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, <vscale x 32 x bfloat> undef)
   ret void
 }
 
@@ -194,6 +281,12 @@ define void @reduce_ordered_fmul_half() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_half'
@@ -205,6 +298,12 @@ define void @reduce_ordered_fmul_half() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, <vscale x 1 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, <vscale x 2 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, <vscale x 4 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, <vscale x 8 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, <vscale x 16 x half> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, <vscale x 32 x half> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef)
@@ -215,6 +314,12 @@ define void @reduce_ordered_fmul_half() {
   %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef)
   %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef)
   %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef)
+  %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, <vscale x 1 x half> undef)
+  %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, <vscale x 2 x half> undef)
+  %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, <vscale x 4 x half> undef)
+  %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, <vscale x 8 x half> undef)
+  %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, <vscale x 16 x half> undef)
+  %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, <vscale x 32 x half> undef)
   ret void
 }
 
@@ -228,6 +333,11 @@ define void @reduce_ordered_fmul_float() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_float'
@@ -239,6 +349,11 @@ define void @reduce_ordered_fmul_float() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, <vscale x 2 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, <vscale x 4 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, <vscale x 8 x float> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, <vscale x 16 x float> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef)
@@ -249,6 +364,11 @@ define void @reduce_ordered_fmul_float() {
   %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef)
   %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef)
   %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef)
+  %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, <vscale x 1 x float> undef)
+  %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, <vscale x 2 x float> undef)
+  %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, <vscale x 4 x float> undef)
+  %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, <vscale x 8 x float> undef)
+  %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, <vscale x 16 x float> undef)
   ret void
 }
 
@@ -262,6 +382,10 @@ define void @reduce_ordered_fmul_double() {
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; FP-REDUCE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; FP-REDUCE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SIZE-LABEL: 'reduce_ordered_fmul_double'
@@ -273,6 +397,10 @@ define void @reduce_ordered_fmul_double() {
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, <vscale x 1 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, <vscale x 2 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, <vscale x 4 x double> undef)
+; SIZE-NEXT:  Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, <vscale x 8 x double> undef)
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef)
@@ -283,5 +411,9 @@ define void @reduce_ordered_fmul_double() {
   %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef)
   %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef)
   %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef)
+  %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, <vscale x 1 x double> undef)
+  %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, <vscale x 2 x double> undef)
+  %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, <vscale x 4 x double> undef)
+  %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, <vscale x 8 x double> undef)
   ret void
 }
diff --git a/llvm/test/Analysis/StackSafetyAnalysis/local.ll b/llvm/test/Analysis/StackSafetyAnalysis/local.ll
index 4a833611c78916e..02d46c8449bae53 100644
--- a/llvm/test/Analysis/StackSafetyAnalysis/local.ll
+++ b/llvm/test/Analysis/StackSafetyAnalysis/local.ll
@@ -1120,5 +1120,21 @@ define void @NonPointer(ptr %p) {
   ret void
 }
 
+@ifunc = dso_local ifunc i64 (ptr), ptr @ifunc_resolver
+
+define dso_local void @CallIfunc(ptr noundef %uaddr) local_unnamed_addr {
+; CHECK-LABEL: @CallIfunc
+; CHECK-NEXT:  args uses:
+; CHECK-NEXT:    uaddr[]: full-set
+entry:
+  tail call i64 @ifunc(ptr noundef %uaddr)
+  ret void
+}
+
+define dso_local ptr @ifunc_resolver() {
+entry:
+  ret ptr null
+}
+
 declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
 declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
diff --git a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
index 5936199bf32f437..3c23b54d2973697 100644
--- a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
+++ b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
@@ -1,17 +1,21 @@
 ; RUN: opt -passes='print<structural-hash>' -disable-output %s 2>&1 | FileCheck %s
 ; RUN: opt -passes='print<structural-hash><detailed>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH
+; RUN: opt -passes='print<structural-hash><call-target-ignored>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH
 
 ; Add a declaration so that we can test we skip it.
-declare i64 @d1()
+declare i64 @d1(i64)
+declare i64 @e1(i64)
 
 define i64 @f1(i64 %a) {
 	%b = add i64 %a, 1
-	ret i64 %b
+	%c = call i64 @d1(i64 %b)
+	ret i64 %c
 }
 
-define i32 @f2(i32 %a) {
-	%b = add i32 %a, 2
-	ret i32 %b
+define i64 @f2(i64 %a) {
+	%b = add i64 %a, 1
+	%c = call i64 @e1(i64 %b)
+	ret i64 %c
 }
 
 ; CHECK: Module Hash: {{([a-f0-9]{16,})}}
@@ -22,3 +26,13 @@ define i32 @f2(i32 %a) {
 ; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]]
 ; DETAILED-HASH-NOT: [[DF1H]]
 ; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}}
+
+; When ignoring the call target, check if `f1` and `f2` produce the same function hash.
+; The index for the call instruction is 1, and the index of the call target operand is 1.
+; The ignored operand hashes for different call targets should be different.
+; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}}
+; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]]
+; CALLTARGETIGNORED-HASH-NEXT:   Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1)
+; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]]
+; CALLTARGETIGNORED-HASH-NOT: [[IO1H]]
+; CALLTARGETIGNORED-HASH-NEXT:   Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1)
diff --git a/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll b/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll
index 95ac98532da6214..5beb0c7cadfbaa2 100644
--- a/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll
+++ b/llvm/test/Analysis/ValueTracking/numsignbits-from-assume.ll
@@ -1,8 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
-
-; FIXME: This does not currently reach a fix point, because an assume can only
-; be propagated backwards after its argument has been simplified.
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 define i32 @computeNumSignBits_add1(i32 %in) {
 ; CHECK-LABEL: @computeNumSignBits_add1(
diff --git a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
index 584c0ef7cfeb785..5cc3a30277459b9 100644
--- a/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
+++ b/llvm/test/Assembler/auto_upgrade_nvvm_intrinsics.ll
@@ -44,6 +44,13 @@ declare ptr @llvm.nvvm.ptr.shared.to.gen.p0.p3(ptr addrspace(3))
 declare ptr @llvm.nvvm.ptr.constant.to.gen.p0.p4(ptr addrspace(4))
 declare ptr @llvm.nvvm.ptr.local.to.gen.p0.p5(ptr addrspace(5))
 
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1), i32)
+declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1), i32)
+declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1), i32)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr, i32)
+declare ptr @llvm.nvvm.ldg.global.p.p0(ptr, i32)
+declare float @llvm.nvvm.ldg.global.f.f32.p0(ptr, i32)
+
 ; CHECK-LABEL: @simple_upgrade
 define void @simple_upgrade(i32 %a, i64 %b, i16 %c) {
 ; CHECK: call i32 @llvm.bitreverse.i32(i32 %a)
@@ -191,3 +198,27 @@ define void @addrspacecast(ptr %p0) {
 
   ret void
 }
+
+; CHECK-LABEL: @ldg
+define void @ldg(ptr %p0, ptr addrspace(1) %p1) {
+; CHECK: %1 = load i32, ptr addrspace(1) %p1, align 4, !invariant.load !0
+; CHECK: %2 = load ptr, ptr addrspace(1) %p1, align 8, !invariant.load !0
+; CHECK: %3 = load float, ptr addrspace(1) %p1, align 16, !invariant.load !0
+
+; CHECK: %4 = addrspacecast ptr %p0 to ptr addrspace(1)
+; CHECK: %5 = load i32, ptr addrspace(1) %4, align 4, !invariant.load !0
+; CHECK: %6 = addrspacecast ptr %p0 to ptr addrspace(1)
+; CHECK: %7 = load ptr, ptr addrspace(1) %6, align 8, !invariant.load !0
+; CHECK: %8 = addrspacecast ptr %p0 to ptr addrspace(1)
+; CHECK: %9 = load float, ptr addrspace(1) %8, align 16, !invariant.load !0
+;
+  %v1 = call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %p1, i32 4)
+  %v2 = call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %p1, i32 8 )
+  %v3 = call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %p1, i32 16)
+
+  %v4 = call i32 @llvm.nvvm.ldg.global.i.i32.p0(ptr %p0, i32 4)
+  %v5 = call ptr @llvm.nvvm.ldg.global.p.p0(ptr %p0, i32 8)
+  %v6 = call float @llvm.nvvm.ldg.global.f.f32.p0(ptr %p0, i32 16)
+
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Assembler/invalid-inttype.ll b/llvm/test/Assembler/invalid-inttype.ll
index c8aa7c66b79e4dd..9e3c31148af2d6a 100644
--- a/llvm/test/Assembler/invalid-inttype.ll
+++ b/llvm/test/Assembler/invalid-inttype.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s
 
 ; i8388609 is the smallest integer type that can't be represented in LLVM IR
-; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range!
+; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range
 @i2 = common global i8388609 0, align 4
diff --git a/llvm/test/Assembler/invalid-name.ll b/llvm/test/Assembler/invalid-name.ll
index 74133e60df54d59..52e2bda3adbabde 100644
Binary files a/llvm/test/Assembler/invalid-name.ll and b/llvm/test/Assembler/invalid-name.ll differ
diff --git a/llvm/test/Assembler/invalid-name2.ll b/llvm/test/Assembler/invalid-name2.ll
index 8a848798a54cafe..78da4dc3d1b8d04 100644
Binary files a/llvm/test/Assembler/invalid-name2.ll and b/llvm/test/Assembler/invalid-name2.ll differ
diff --git a/llvm/test/Bindings/llvm-c/debug_info_new_format.ll b/llvm/test/Bindings/llvm-c/debug_info_new_format.ll
index e7f537aa4f1a9aa..a3f4bb2421fa260 100644
--- a/llvm/test/Bindings/llvm-c/debug_info_new_format.ll
+++ b/llvm/test/Bindings/llvm-c/debug_info_new_format.ll
@@ -6,15 +6,18 @@
 
 ; CHECK:      define i64 @foo(i64 %0, i64 %1, <10 x i64> %2) !dbg !31 {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:     #dbg_declare(i64 0, !38, !DIExpression(), !44)
-; CHECK-NEXT:     #dbg_declare(i64 0, !39, !DIExpression(), !44)
-; CHECK-NEXT:     #dbg_declare(i64 0, !40, !DIExpression(), !44)
+; CHECK-NEXT:     #dbg_declare(i64 0, !38, !DIExpression(), !45)
+; CHECK-NEXT:     #dbg_declare(i64 0, !39, !DIExpression(), !45)
+; CHECK-NEXT:     #dbg_declare(i64 0, !40, !DIExpression(), !45)
+; CHECK-NEXT:     #dbg_label(!46, !45)
+; CHECK-NEXT:   br label %vars
+; CHECK-NEXT:     #dbg_label(!47, !45)
 ; CHECK-NEXT:   br label %vars
 ; CHECK:      vars:
 ; CHECK-NEXT:   %p1 = phi i64 [ 0, %entry ]
 ; CHECK-NEXT:   %p2 = phi i64 [ 0, %entry ]
-; CHECK-NEXT:     #dbg_value(i64 0, !41, !DIExpression(DW_OP_constu, 0, DW_OP_stack_value), !45)
-; CHECK-NEXT:     #dbg_value(i64 1, !43, !DIExpression(DW_OP_constu, 1, DW_OP_stack_value), !45)
+; CHECK-NEXT:     #dbg_value(i64 0, !41, !DIExpression(DW_OP_constu, 0, DW_OP_stack_value), !48)
+; CHECK-NEXT:     #dbg_value(i64 1, !43, !DIExpression(DW_OP_constu, 1, DW_OP_stack_value), !48)
 ; CHECK-NEXT:   %a = add i64 %p1, %p2
 ; CHECK-NEXT:   ret i64 0
 ; CHECK-NEXT: }
@@ -60,12 +63,15 @@
 ; CHECK-NEXT: !34 = !DICompositeType(tag: DW_TAG_array_type, baseType: !6, size: 640, flags: DIFlagVector, elements: !35)
 ; CHECK-NEXT: !35 = !{!36}
 ; CHECK-NEXT: !36 = !DISubrange(count: 10, lowerBound: 0)
-; CHECK-NEXT: !37 = !{!38, !39, !40, !41, !43}
+; CHECK-NEXT: !37 = !{!38, !39, !40, !41, !43, !44}
 ; CHECK-NEXT: !38 = !DILocalVariable(name: "a", arg: 1, scope: !31, file: !1, line: 42, type: !6)
 ; CHECK-NEXT: !39 = !DILocalVariable(name: "b", arg: 2, scope: !31, file: !1, line: 42, type: !6)
 ; CHECK-NEXT: !40 = !DILocalVariable(name: "c", arg: 3, scope: !31, file: !1, line: 42, type: !34)
 ; CHECK-NEXT: !41 = !DILocalVariable(name: "d", scope: !42, file: !1, line: 43, type: !6)
 ; CHECK-NEXT: !42 = distinct !DILexicalBlock(scope: !31, file: !1, line: 42)
 ; CHECK-NEXT: !43 = !DILocalVariable(name: "e", scope: !42, file: !1, line: 44, type: !6)
-; CHECK-NEXT: !44 = !DILocation(line: 42, scope: !31)
-; CHECK-NEXT: !45 = !DILocation(line: 43, scope: !31)
+; CHECK-NEXT: !44 = !DILabel(scope: !31, name: "label3", file: !1, line: 42)
+; CHECK-NEXT: !45 = !DILocation(line: 42, scope: !31)
+; CHECK-NEXT: !46 = !DILabel(scope: !31, name: "label1", file: !1, line: 42)
+; CHECK-NEXT: !47 = !DILabel(scope: !31, name: "label2", file: !1, line: 42)
+; CHECK-NEXT: !48 = !DILocation(line: 43, scope: !31)
diff --git a/llvm/test/Bitcode/attributes.ll b/llvm/test/Bitcode/attributes.ll
index 737f49aa86a7ba2..492de663884df4a 100644
--- a/llvm/test/Bitcode/attributes.ll
+++ b/llvm/test/Bitcode/attributes.ll
@@ -512,7 +512,7 @@ define void @f92() sanitize_realtime
 }
 
 ; CHECK: define void @f93() #54
-define void @f93() sanitize_realtime_unsafe {
+define void @f93() sanitize_realtime_blocking {
         ret void;
 }
 
@@ -616,7 +616,7 @@ define void @initializes(ptr initializes((-4, 0), (4, 8)) %a) {
 ; CHECK: attributes #51 = { uwtable(sync) }
 ; CHECK: attributes #52 = { nosanitize_bounds }
 ; CHECK: attributes #53 = { sanitize_realtime }
-; CHECK: attributes #54 = { sanitize_realtime_unsafe }
+; CHECK: attributes #54 = { sanitize_realtime_blocking }
 ; CHECK: attributes [[FNRETTHUNKEXTERN]] = { fn_ret_thunk_extern }
 ; CHECK: attributes [[SKIPPROFILE]] = { skipprofile }
 ; CHECK: attributes [[OPTDEBUG]] = { optdebug }
diff --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index 280c3a99d7535f8..a849789da536ace 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -2048,8 +2048,8 @@ declare void @f.sanitize_numerical_stability() sanitize_numerical_stability
 declare void @f.sanitize_realtime() sanitize_realtime
 ; CHECK: declare void @f.sanitize_realtime() #52
 
-declare void @f.sanitize_realtime_unsafe() sanitize_realtime_unsafe
-; CHECK: declare void @f.sanitize_realtime_unsafe() #53
+declare void @f.sanitize_realtime_blocking() sanitize_realtime_blocking
+; CHECK: declare void @f.sanitize_realtime_blocking() #53
 
 ; CHECK: declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan))
 declare nofpclass(snan) float @nofpclass_snan(float nofpclass(snan))
@@ -2183,7 +2183,7 @@ define float @nofpclass_callsites(float %arg, { float } %arg1) {
 ; CHECK: attributes #50 = { allockind("alloc,uninitialized") }
 ; CHECK: attributes #51 = { sanitize_numerical_stability }
 ; CHECK: attributes #52 = { sanitize_realtime }
-; CHECK: attributes #53 = { sanitize_realtime_unsafe }
+; CHECK: attributes #53 = { sanitize_realtime_blocking }
 ; CHECK: attributes #54 = { builtin }
 
 ;; Metadata
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
new file mode 100644
index 000000000000000..69cd6ce87b5c6b7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-sincos.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
+
+define { half, half } @test_sincos_f16(half %a) {
+  ; CHECK-LABEL: name: test_sincos_f16
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $h0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s16) = COPY $h0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(s16), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $h0 = COPY [[FSINCOS]](s16)
+  ; CHECK-NEXT:   $h1 = COPY [[FSINCOS1]](s16)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $h0, implicit $h1
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  ret { half, half } %result
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+  ; CHECK-LABEL: name: test_sincos_v2f16
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0
+  ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(<2 x s16>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[UV]]
+  ; CHECK-NEXT:   [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS]](<2 x s16>)
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV2]](s16), [[UV3]](s16), [[DEF]](s16), [[DEF]](s16)
+  ; CHECK-NEXT:   [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FSINCOS1]](<2 x s16>)
+  ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[DEF]](s16), [[DEF]](s16)
+  ; CHECK-NEXT:   $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+  ; CHECK-NEXT:   $d1 = COPY [[BUILD_VECTOR1]](<4 x s16>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0, implicit $d1
+  %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+  ; CHECK-LABEL: name: test_sincos_f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FSINCOS]](s32)
+  ; CHECK-NEXT:   $s1 = COPY [[FSINCOS1]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0, implicit $s1
+  %result = call { float, float } @llvm.sincos.f32(float %a)
+  ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+  ; CHECK-LABEL: name: test_sincos_v2f32
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(<2 x s32>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $d0 = COPY [[FSINCOS]](<2 x s32>)
+  ; CHECK-NEXT:   $d1 = COPY [[FSINCOS1]](<2 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0, implicit $d1
+  %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+  ; CHECK-LABEL: name: test_sincos_f64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $d0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(s64), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $d0 = COPY [[FSINCOS]](s64)
+  ; CHECK-NEXT:   $d1 = COPY [[FSINCOS1]](s64)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $d0, implicit $d1
+  %result = call { double, double } @llvm.sincos.f64(double %a)
+  ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+  ; CHECK-LABEL: name: test_sincos_v2f64
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(<2 x s64>), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $q0 = COPY [[FSINCOS]](<2 x s64>)
+  ; CHECK-NEXT:   $q1 = COPY [[FSINCOS1]](<2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0, implicit $q1
+  %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
+
+define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
+  ; CHECK-LABEL: name: test_sincos_f128
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $q0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s128) = COPY $q0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(s128), [[FSINCOS1:%[0-9]+]]:_ = G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $q0 = COPY [[FSINCOS]](s128)
+  ; CHECK-NEXT:   $q1 = COPY [[FSINCOS1]](s128)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $q0, implicit $q1
+  %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a)
+  ret { fp128, fp128 } %result
+}
+
+define { float, float } @test_sincos_f32_afn(float %a) {
+  ; CHECK-LABEL: name: test_sincos_f32_afn
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $s0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $s0
+  ; CHECK-NEXT:   [[FSINCOS:%[0-9]+]]:_(s32), [[FSINCOS1:%[0-9]+]]:_ = afn G_FSINCOS [[COPY]]
+  ; CHECK-NEXT:   $s0 = COPY [[FSINCOS]](s32)
+  ; CHECK-NEXT:   $s1 = COPY [[FSINCOS1]](s32)
+  ; CHECK-NEXT:   RET_ReallyLR implicit $s0, implicit $s1
+  %result = call afn { float, float } @llvm.sincos.f32(float %a)
+  ret { float, float } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index 80b6e4f6d528a2a..6be99d0088f1cb9 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -24,7 +24,6 @@
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 #
 # DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
-# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
 #
@@ -701,6 +700,9 @@
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
 # DEBUG-NEXT: .. the first uncovered imm index: 0, OK
+# DEBUG-NEXT: G_FSINCOS (opcode {{[0-9]+}}): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
 # DEBUG-NEXT: G_FTAN (opcode {{[0-9]+}}): 1 type index, 0 imm indices
 # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
 # DEBUG-NEXT: .. the first uncovered type index: 1, OK
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll
new file mode 100644
index 000000000000000..bc51cf7bac23c95
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1  | FileCheck %s
+; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s
+
+;; add
+define <vscale x 2 x i64> @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: addnxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = add <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %c
+}
+
+define <vscale x 4 x i32> @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: addnxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+entry:
+  %c = add <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: addnxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+entry:
+  %c = add <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %c
+}
+
+define <vscale x 16 x i8> @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: addnxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+entry:
+  %c = add <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %c
+}
+
+;; sub
+define <vscale x 2 x i64> @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: subnxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = sub <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %c
+}
+
+define <vscale x 4 x i32> @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: subnxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+entry:
+  %c = sub <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: subnxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
+entry:
+  %c = sub <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %c
+}
+
+define <vscale x 16 x i8> @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: subnxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
+entry:
+  %c = sub <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %c
+}
+
+;; and
+define <vscale x 2 x i64> @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: andnxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = and <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %c
+}
+
+define <vscale x 4 x i32> @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: andnxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = and <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: andnxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = and <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %c
+}
+
+define <vscale x 16 x i8> @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: andnxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    and z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = and <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %c
+}
+
+;; or
+define <vscale x 2 x i64> @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ornxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = or <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %c
+}
+
+define <vscale x 4 x i32> @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ornxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = or <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: ornxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = or <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %c
+}
+
+define <vscale x 16 x i8> @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: ornxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    orr z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = or <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %c
+}
+
+;; xor
+define <vscale x 2 x i64> @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: xornxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = xor <vscale x 2 x i64> %a, %b
+  ret <vscale x 2 x i64> %c
+}
+
+define <vscale x 4 x i32> @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: xornxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = xor <vscale x 4 x i32> %a, %b
+  ret <vscale x 4 x i32> %c
+}
+
+define <vscale x 8 x i16> @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
+; CHECK-LABEL: xornxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = xor <vscale x 8 x i16> %a, %b
+  ret <vscale x 8 x i16> %c
+}
+
+define <vscale x 16 x i8> @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: xornxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %c = xor <vscale x 16 x i8> %a, %b
+  ret <vscale x 16 x i8> %c
+}
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index d677526bab00050..11397703b4442e8 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -4,16 +4,7 @@
 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
 ; CHECK-GI:       warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl2_v4i32_uzp1
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for pmlsl_pmlsl2_v8i16_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl_smlsl2_v8i16_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl_umlsl2_v8i16_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for smlsl_smlsl2_v4i32_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for umlsl_umlsl2_v4i32_uzp1
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for do_stuff
 
 define <8 x i16> @smull_v8i8_v8i16(ptr %A, ptr %B) nounwind {
 ; CHECK-LABEL: smull_v8i8_v8i16:
@@ -2025,13 +2016,30 @@ define void @pmlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
 }
 
 define void @smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
-; CHECK-LABEL: smlsl2_v8i16_uzp1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q2, [x1, #16]
-; CHECK-NEXT:    uzp1 v2.16b, v0.16b, v2.16b
-; CHECK-NEXT:    smlsl2 v1.8h, v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: smlsl2_v8i16_uzp1:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    ldr q2, [x1, #16]
+; CHECK-NEON-NEXT:    uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    smlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: smlsl2_v8i16_uzp1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    ldr q2, [x1, #16]
+; CHECK-SVE-NEXT:    uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-SVE-NEXT:    smlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: smlsl2_v8i16_uzp1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q2, [x1, #16]
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.8b, v2.8h
+; CHECK-GI-NEXT:    umlsl v1.8h, v0.8b, v2.8b
+; CHECK-GI-NEXT:    str q1, [x0]
+; CHECK-GI-NEXT:    ret
   %5 = getelementptr inbounds i32, ptr %3, i64 4
   %6 = load <8 x i16>, ptr %5, align 4
   %7 = trunc <8 x i16> %6 to <8 x i8>
@@ -2043,13 +2051,30 @@ define void @smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
 }
 
 define void @umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
-; CHECK-LABEL: umlsl2_v8i16_uzp1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q2, [x1, #16]
-; CHECK-NEXT:    uzp1 v2.16b, v0.16b, v2.16b
-; CHECK-NEXT:    umlsl2 v1.8h, v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: umlsl2_v8i16_uzp1:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    ldr q2, [x1, #16]
+; CHECK-NEON-NEXT:    uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    umlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: umlsl2_v8i16_uzp1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    ldr q2, [x1, #16]
+; CHECK-SVE-NEXT:    uzp1 v2.16b, v0.16b, v2.16b
+; CHECK-SVE-NEXT:    umlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: umlsl2_v8i16_uzp1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q2, [x1, #16]
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.8b, v2.8h
+; CHECK-GI-NEXT:    smlsl v1.8h, v0.8b, v2.8b
+; CHECK-GI-NEXT:    str q1, [x0]
+; CHECK-GI-NEXT:    ret
   %5 = getelementptr inbounds i32, ptr %3, i64 4
   %6 = load <8 x i16>, ptr %5, align 4
   %7 = trunc <8 x i16> %6 to <8 x i8>
@@ -2061,13 +2086,30 @@ define void @umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3) {
 }
 
 define void @smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
-; CHECK-LABEL: smlsl2_v4i32_uzp1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q2, [x1, #16]
-; CHECK-NEXT:    uzp1 v2.8h, v0.8h, v2.8h
-; CHECK-NEXT:    smlsl2 v1.4s, v0.8h, v2.8h
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: smlsl2_v4i32_uzp1:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    ldr q2, [x1, #16]
+; CHECK-NEON-NEXT:    uzp1 v2.8h, v0.8h, v2.8h
+; CHECK-NEON-NEXT:    smlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: smlsl2_v4i32_uzp1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    ldr q2, [x1, #16]
+; CHECK-SVE-NEXT:    uzp1 v2.8h, v0.8h, v2.8h
+; CHECK-SVE-NEXT:    smlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: smlsl2_v4i32_uzp1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q2, [x1, #16]
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
+; CHECK-GI-NEXT:    umlsl v1.4s, v0.4h, v2.4h
+; CHECK-GI-NEXT:    str q1, [x0]
+; CHECK-GI-NEXT:    ret
   %5 = getelementptr inbounds i32, ptr %3, i64 4
   %6 = load <4 x i32>, ptr %5, align 4
   %7 = trunc <4 x i32> %6 to <4 x i16>
@@ -2079,13 +2121,30 @@ define void @smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
 }
 
 define void @umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3) {
-; CHECK-LABEL: umlsl2_v4i32_uzp1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q2, [x1, #16]
-; CHECK-NEXT:    uzp1 v2.8h, v0.8h, v2.8h
-; CHECK-NEXT:    umlsl2 v1.4s, v0.8h, v2.8h
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: umlsl2_v4i32_uzp1:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    ldr q2, [x1, #16]
+; CHECK-NEON-NEXT:    uzp1 v2.8h, v0.8h, v2.8h
+; CHECK-NEON-NEXT:    umlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: umlsl2_v4i32_uzp1:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    ldr q2, [x1, #16]
+; CHECK-SVE-NEXT:    uzp1 v2.8h, v0.8h, v2.8h
+; CHECK-SVE-NEXT:    umlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: umlsl2_v4i32_uzp1:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    ldr q2, [x1, #16]
+; CHECK-GI-NEXT:    mov d0, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
+; CHECK-GI-NEXT:    smlsl v1.4s, v0.4h, v2.4h
+; CHECK-GI-NEXT:    str q1, [x0]
+; CHECK-GI-NEXT:    ret
   %5 = getelementptr inbounds i32, ptr %3, i64 4
   %6 = load <4 x i32>, ptr %5, align 4
   %7 = trunc <4 x i32> %6 to <4 x i16>
@@ -2124,14 +2183,35 @@ entry:
 }
 
 define void @smlsl_smlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) {
-; CHECK-LABEL: smlsl_smlsl2_v8i16_uzp1:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q2, q3, [x1]
-; CHECK-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    smlsl v1.8h, v0.8b, v2.8b
-; CHECK-NEXT:    smlsl2 v1.8h, v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: smlsl_smlsl2_v8i16_uzp1:
+; CHECK-NEON:       // %bb.0: // %entry
+; CHECK-NEON-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEON-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT:    smlsl v1.8h, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    smlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: smlsl_smlsl2_v8i16_uzp1:
+; CHECK-SVE:       // %bb.0: // %entry
+; CHECK-SVE-NEXT:    ldp q2, q3, [x1]
+; CHECK-SVE-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-SVE-NEXT:    smlsl v1.8h, v0.8b, v2.8b
+; CHECK-SVE-NEXT:    smlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: smlsl_smlsl2_v8i16_uzp1:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q4, q2, [x1]
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.8b, v2.8h
+; CHECK-GI-NEXT:    xtn v4.8b, v4.8h
+; CHECK-GI-NEXT:    umull v2.8h, v3.8b, v2.8b
+; CHECK-GI-NEXT:    umlal v2.8h, v0.8b, v4.8b
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
 entry:
   %5 = load <8 x i16>, ptr %3, align 4
   %6 = trunc <8 x i16> %5 to <8 x i8>
@@ -2149,14 +2229,35 @@ entry:
 }
 
 define void @umlsl_umlsl2_v8i16_uzp1(<16 x i8> %0, <8 x i16> %1, ptr %2, ptr %3, i32 %4) {
-; CHECK-LABEL: umlsl_umlsl2_v8i16_uzp1:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q2, q3, [x1]
-; CHECK-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    umlsl v1.8h, v0.8b, v2.8b
-; CHECK-NEXT:    umlsl2 v1.8h, v0.16b, v2.16b
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: umlsl_umlsl2_v8i16_uzp1:
+; CHECK-NEON:       // %bb.0: // %entry
+; CHECK-NEON-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEON-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-NEON-NEXT:    umlsl v1.8h, v0.8b, v2.8b
+; CHECK-NEON-NEXT:    umlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: umlsl_umlsl2_v8i16_uzp1:
+; CHECK-SVE:       // %bb.0: // %entry
+; CHECK-SVE-NEXT:    ldp q2, q3, [x1]
+; CHECK-SVE-NEXT:    uzp1 v2.16b, v2.16b, v3.16b
+; CHECK-SVE-NEXT:    umlsl v1.8h, v0.8b, v2.8b
+; CHECK-SVE-NEXT:    umlsl2 v1.8h, v0.16b, v2.16b
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: umlsl_umlsl2_v8i16_uzp1:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q4, q2, [x1]
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.8b, v2.8h
+; CHECK-GI-NEXT:    xtn v4.8b, v4.8h
+; CHECK-GI-NEXT:    smull v2.8h, v3.8b, v2.8b
+; CHECK-GI-NEXT:    smlal v2.8h, v0.8b, v4.8b
+; CHECK-GI-NEXT:    sub v0.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
 entry:
   %5 = load <8 x i16>, ptr %3, align 4
   %6 = trunc <8 x i16> %5 to <8 x i8>
@@ -2174,14 +2275,35 @@ entry:
 }
 
 define void @smlsl_smlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3, i32 %4) {
-; CHECK-LABEL: smlsl_smlsl2_v4i32_uzp1:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q2, q3, [x1]
-; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
-; CHECK-NEXT:    smlsl v1.4s, v0.4h, v2.4h
-; CHECK-NEXT:    smlsl2 v1.4s, v0.8h, v2.8h
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: smlsl_smlsl2_v4i32_uzp1:
+; CHECK-NEON:       // %bb.0: // %entry
+; CHECK-NEON-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEON-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-NEON-NEXT:    smlsl v1.4s, v0.4h, v2.4h
+; CHECK-NEON-NEXT:    smlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: smlsl_smlsl2_v4i32_uzp1:
+; CHECK-SVE:       // %bb.0: // %entry
+; CHECK-SVE-NEXT:    ldp q2, q3, [x1]
+; CHECK-SVE-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-SVE-NEXT:    smlsl v1.4s, v0.4h, v2.4h
+; CHECK-SVE-NEXT:    smlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: smlsl_smlsl2_v4i32_uzp1:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q4, q2, [x1]
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
+; CHECK-GI-NEXT:    xtn v4.4h, v4.4s
+; CHECK-GI-NEXT:    umull v2.4s, v3.4h, v2.4h
+; CHECK-GI-NEXT:    umlal v2.4s, v0.4h, v4.4h
+; CHECK-GI-NEXT:    sub v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
 entry:
   %5 = load <4 x i32>, ptr %3, align 4
   %6 = trunc <4 x i32> %5 to <4 x i16>
@@ -2199,14 +2321,35 @@ entry:
 }
 
 define void @umlsl_umlsl2_v4i32_uzp1(<8 x i16> %0, <4 x i32> %1, ptr %2, ptr %3, i32 %4) {
-; CHECK-LABEL: umlsl_umlsl2_v4i32_uzp1:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    ldp q2, q3, [x1]
-; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
-; CHECK-NEXT:    umlsl v1.4s, v0.4h, v2.4h
-; CHECK-NEXT:    umlsl2 v1.4s, v0.8h, v2.8h
-; CHECK-NEXT:    str q1, [x0]
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: umlsl_umlsl2_v4i32_uzp1:
+; CHECK-NEON:       // %bb.0: // %entry
+; CHECK-NEON-NEXT:    ldp q2, q3, [x1]
+; CHECK-NEON-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-NEON-NEXT:    umlsl v1.4s, v0.4h, v2.4h
+; CHECK-NEON-NEXT:    umlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-NEON-NEXT:    str q1, [x0]
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: umlsl_umlsl2_v4i32_uzp1:
+; CHECK-SVE:       // %bb.0: // %entry
+; CHECK-SVE-NEXT:    ldp q2, q3, [x1]
+; CHECK-SVE-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-SVE-NEXT:    umlsl v1.4s, v0.4h, v2.4h
+; CHECK-SVE-NEXT:    umlsl2 v1.4s, v0.8h, v2.8h
+; CHECK-SVE-NEXT:    str q1, [x0]
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: umlsl_umlsl2_v4i32_uzp1:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    ldp q4, q2, [x1]
+; CHECK-GI-NEXT:    mov d3, v0.d[1]
+; CHECK-GI-NEXT:    xtn v2.4h, v2.4s
+; CHECK-GI-NEXT:    xtn v4.4h, v4.4s
+; CHECK-GI-NEXT:    smull v2.4s, v3.4h, v2.4h
+; CHECK-GI-NEXT:    smlal v2.4s, v0.4h, v4.4h
+; CHECK-GI-NEXT:    sub v0.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT:    str q0, [x0]
+; CHECK-GI-NEXT:    ret
 entry:
   %5 = load <4 x i32>, ptr %3, align 4
   %6 = trunc <4 x i32> %5 to <4 x i16>
@@ -2224,13 +2367,31 @@ entry:
 }
 
 define <2 x i32> @do_stuff(<2 x i64> %0, <2 x i64> %1) {
-; CHECK-LABEL: do_stuff:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT:    smull2 v0.2d, v1.4s, v0.4s
-; CHECK-NEXT:    xtn v0.2s, v0.2d
-; CHECK-NEXT:    add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT:    ret
+; CHECK-NEON-LABEL: do_stuff:
+; CHECK-NEON:       // %bb.0:
+; CHECK-NEON-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-NEON-NEXT:    smull2 v0.2d, v1.4s, v0.4s
+; CHECK-NEON-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEON-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-NEON-NEXT:    ret
+;
+; CHECK-SVE-LABEL: do_stuff:
+; CHECK-SVE:       // %bb.0:
+; CHECK-SVE-NEXT:    uzp1 v0.4s, v0.4s, v0.4s
+; CHECK-SVE-NEXT:    smull2 v0.2d, v1.4s, v0.4s
+; CHECK-SVE-NEXT:    xtn v0.2s, v0.2d
+; CHECK-SVE-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-SVE-NEXT:    ret
+;
+; CHECK-GI-LABEL: do_stuff:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    ext v2.16b, v1.16b, v2.16b, #8
+; CHECK-GI-NEXT:    umull v0.2d, v2.2s, v0.2s
+; CHECK-GI-NEXT:    xtn v0.2s, v0.2d
+; CHECK-GI-NEXT:    add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT:    ret
   %bc.1 = bitcast <2 x i64> %1 to <4 x i32>
   %trunc.0 = trunc <2 x i64> %0 to <2 x i32>
   %shuff.hi = shufflevector <4 x i32> %bc.1, <4 x i32> zeroinitializer, <2 x i32> <i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 1dfd977186b0e73..7af7c235f9ac16b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -3560,4 +3560,16 @@ entry:
   ret <4 x i16> %vrshrn_n1
 }
 
+define <8 x i16> @signbits_vashr(<8 x i16> %a)  {
+; CHECK-LABEL: signbits_vashr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshr.8h v0, v0, #8
+; CHECK-NEXT:    sshr.8h v0, v0, #9
+; CHECK-NEXT:    ret
+  %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>)
+  %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> <i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9, i16 -9>)
+  %d = ashr <8 x i16> %c, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %d
+}
+
 declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
diff --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
index 049098ab2ae97d8..40684b0f3a256b0 100644
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@@ -131,7 +131,7 @@ define double @t1_strict(double %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") #0
-  %conv1 = call double @llvm.experimental.constrained.sitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %conv1
 }
 
@@ -143,7 +143,7 @@ define float @t2_strict(float %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
-  %conv1 = call float @llvm.experimental.constrained.sitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %conv1
 }
 
@@ -155,7 +155,7 @@ define half @t3_strict(half %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0
-  %conv1 = call half @llvm.experimental.constrained.sitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret half %conv1
 }
 
@@ -167,7 +167,7 @@ define double @t4_strict(double %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") #0
-  %conv1 = call double @llvm.experimental.constrained.uitofp.i64.f64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret double %conv1
 }
 
@@ -179,7 +179,7 @@ define float @t5_strict(float %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
-  %conv1 = call float @llvm.experimental.constrained.uitofp.i32.f32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret float %conv1
 }
 
@@ -191,7 +191,7 @@ define half @t6_strict(half %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0
-  %conv1 = call half @llvm.experimental.constrained.uitofp.i32.f16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret half %conv1
 }
 
@@ -216,7 +216,7 @@ define bfloat @t7_strict(bfloat %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptosi.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
-  %conv1 = call bfloat @llvm.experimental.constrained.sitofp.i32.bf16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret bfloat %conv1
 }
 
@@ -241,7 +241,7 @@ define bfloat @t8_strict(bfloat %x) #0 {
 ; CHECK-NEXT:    ret
 entry:
   %conv = call i32 @llvm.experimental.constrained.fptoui.i32.bf16(bfloat %x, metadata !"fpexcept.strict") #0
-  %conv1 = call bfloat @llvm.experimental.constrained.uitofp.i32.bf16(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %conv1 = call bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32 %conv, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret bfloat %conv1
 }
 
@@ -255,11 +255,11 @@ declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
 declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
 declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
-declare bfloat @llvm.experimental.constrained.sitofp.i32.bf16(i32, metadata, metadata)
-declare bfloat @llvm.experimental.constrained.uitofp.i32.bf16(i32, metadata, metadata)
-declare half @llvm.experimental.constrained.sitofp.i32.f16(i32, metadata, metadata)
-declare half @llvm.experimental.constrained.uitofp.i32.f16(i32, metadata, metadata)
-declare float @llvm.experimental.constrained.sitofp.i32.f32(i32, metadata, metadata)
-declare float @llvm.experimental.constrained.uitofp.i32.f32(i32, metadata, metadata)
-declare double @llvm.experimental.constrained.sitofp.i64.f64(i64, metadata, metadata)
-declare double @llvm.experimental.constrained.uitofp.i64.f64(i64, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.sitofp.bf16.i32(i32, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.uitofp.bf16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata)
+declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
index 3aeefab52c6fa33..4cce06dce44c9ba 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-fp16.ll
@@ -595,7 +595,7 @@ define i32 @lrint_f16(half %x) #0 {
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs w0, h0
 ; CHECK-FP16-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.lrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -612,7 +612,7 @@ define i64 @llrint_f16(half %x) #0 {
 ; CHECK-FP16-NEXT:    frintx h0, h0
 ; CHECK-FP16-NEXT:    fcvtzs x0, h0
 ; CHECK-FP16-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.llrint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -693,7 +693,7 @@ define i32 @lround_f16(half %x) #0 {
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtas w0, h0
 ; CHECK-FP16-NEXT:    ret
-  %val = call i32 @llvm.experimental.constrained.lround.f16(half %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -708,7 +708,7 @@ define i64 @llround_f16(half %x) #0 {
 ; CHECK-FP16:       // %bb.0:
 ; CHECK-FP16-NEXT:    fcvtas x0, h0
 ; CHECK-FP16-NEXT:    ret
-  %val = call i64 @llvm.experimental.constrained.llround.f16(half %x, metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -1277,14 +1277,14 @@ declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata)
 declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata)
 declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata)
 declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata)
-declare i32 @llvm.experimental.constrained.lrint.f16(half, metadata, metadata)
-declare i64 @llvm.experimental.constrained.llrint.f16(half, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f16(half, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata)
 declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata)
 declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata)
 declare half @llvm.experimental.constrained.ceil.f16(half, metadata)
 declare half @llvm.experimental.constrained.floor.f16(half, metadata)
-declare i32 @llvm.experimental.constrained.lround.f16(half, metadata)
-declare i64 @llvm.experimental.constrained.llround.f16(half, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f16(half, metadata)
+declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata)
 declare half @llvm.experimental.constrained.round.f16(half, metadata)
 declare half @llvm.experimental.constrained.roundeven.f16(half, metadata)
 declare half @llvm.experimental.constrained.trunc.f16(half, metadata)
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
index 1a9ba9fd4a5180c..6147afba4e603ae 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll
@@ -279,7 +279,7 @@ define <4 x i1> @fcmps_v4f32(<4 x float> %x, <4 x float> %y) #0 {
 ; CHECK-NEXT:    xtn v0.4h, v4.4s
 ; CHECK-NEXT:    ret
 entry:
-  %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
+  %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict")
   ret <4 x i1> %val
 }
 
@@ -825,8 +825,8 @@ declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, meta
 declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata)
 declare <4 x float> @llvm.experimental.constrained.roundeven.v4f32(<4 x float>, metadata)
 declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata)
-declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float>, <4 x float>, metadata, metadata)
-declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float>, <4 x float>, metadata, metadata)
 
 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
 declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
index 1664fa3ce56ae6b..fd3a0c3207606c2 100644
--- a/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/AArch64/fp-intrinsics.ll
@@ -262,7 +262,7 @@ define float @nearbyint_f32(float %x) #0 {
 ; CHECK: frintx [[REG:s[0-9]+]], s0
 ; CHECK: fcvtzs w0, [[REG]]
 define i32 @lrint_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -270,7 +270,7 @@ define i32 @lrint_f32(float %x) #0 {
 ; CHECK: frintx [[REG:s[0-9]+]], s0
 ; CHECK: fcvtzs x0, [[REG]]
 define i64 @llrint_f32(float %x) #0 {
-  %val = call i64 @llvm.experimental.constrained.llrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -319,14 +319,14 @@ define float @floor_f32(float %x) #0 {
 ; CHECK-LABEL: lround_f32:
 ; CHECK: fcvtas w0, s0
 define i32 @lround_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lround.f32(float %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llround_f32:
 ; CHECK: fcvtas x0, s0
 define i64 @llround_f32(float %x) #0 {
-  %val = call i64 @llvm.experimental.constrained.llround.f32(float %x, metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f32(float %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -802,7 +802,7 @@ define double @nearbyint_f64(double %x) #0 {
 ; CHECK: frintx [[REG:d[0-9]+]], d0
 ; CHECK: fcvtzs w0, [[REG]]
 define i32 @lrint_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -810,7 +810,7 @@ define i32 @lrint_f64(double %x) #0 {
 ; CHECK: frintx [[REG:d[0-9]+]], d0
 ; CHECK: fcvtzs x0, [[REG]]
 define i64 @llrint_f64(double %x) #0 {
-  %val = call i64 @llvm.experimental.constrained.llrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -859,14 +859,14 @@ define double @floor_f64(double %x) #0 {
 ; CHECK-LABEL: lround_f64:
 ; CHECK: fcvtas w0, d0
 define i32 @lround_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lround.f64(double %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llround_f64:
 ; CHECK: fcvtas x0, d0
 define i64 @llround_f64(double %x) #0 {
-  %val = call i64 @llvm.experimental.constrained.llround.f64(double %x, metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f64(double %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -1341,14 +1341,14 @@ define fp128 @nearbyint_f128(fp128 %x) #0 {
 ; CHECK-LABEL: lrint_f128:
 ; CHECK: bl lrintl
 define i32 @lrint_f128(fp128 %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lrint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llrint_f128:
 ; CHECK: bl llrintl
 define i64 @llrint_f128(fp128 %x) #0 {
-  %val = call i64 @llvm.experimental.constrained.llrint.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llrint.i64.f128(fp128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -1383,14 +1383,14 @@ define fp128 @floor_f128(fp128 %x) #0 {
 ; CHECK-LABEL: lround_f128:
 ; CHECK: bl lroundl
 define i32 @lround_f128(fp128 %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lround.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llround_f128:
 ; CHECK: bl llroundl
 define i64 @llround_f128(fp128 %x) #0 {
-  %val = call i64 @llvm.experimental.constrained.llround.f128(fp128 %x, metadata !"fpexcept.strict") #0
+  %val = call i64 @llvm.experimental.constrained.llround.i64.f128(fp128 %x, metadata !"fpexcept.strict") #0
   ret i64 %val
 }
 
@@ -1795,16 +1795,16 @@ declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
-declare i32 @llvm.experimental.constrained.lrint.f32(float, metadata, metadata)
-declare i64 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.i64.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata)
 declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata)
 declare float @llvm.experimental.constrained.maximum.f32(float, float, metadata)
 declare float @llvm.experimental.constrained.minimum.f32(float, float, metadata)
 declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
 declare float @llvm.experimental.constrained.floor.f32(float, metadata)
-declare i32 @llvm.experimental.constrained.lround.f32(float, metadata)
-declare i64 @llvm.experimental.constrained.llround.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
+declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
 declare float @llvm.experimental.constrained.round.f32(float, metadata)
 declare float @llvm.experimental.constrained.roundeven.f32(float, metadata)
 declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
@@ -1847,16 +1847,16 @@ declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata
 declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
-declare i32 @llvm.experimental.constrained.lrint.f64(double, metadata, metadata)
-declare i64 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.i64.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata)
 declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata)
 declare double @llvm.experimental.constrained.maximum.f64(double, double, metadata)
 declare double @llvm.experimental.constrained.minimum.f64(double, double, metadata)
 declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
 declare double @llvm.experimental.constrained.floor.f64(double, metadata)
-declare i32 @llvm.experimental.constrained.lround.f64(double, metadata)
-declare i64 @llvm.experimental.constrained.llround.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
+declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
 declare double @llvm.experimental.constrained.round.f64(double, metadata)
 declare double @llvm.experimental.constrained.roundeven.f64(double, metadata)
 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
@@ -1899,14 +1899,14 @@ declare fp128 @llvm.experimental.constrained.exp.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.exp2.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
-declare i32 @llvm.experimental.constrained.lrint.f128(fp128, metadata, metadata)
-declare i64 @llvm.experimental.constrained.llrint.f128(fp128, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f128(fp128, metadata, metadata)
+declare i64 @llvm.experimental.constrained.llrint.i64.f128(fp128, metadata, metadata)
 declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata)
 declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata)
 declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata)
 declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata)
-declare i32 @llvm.experimental.constrained.lround.f128(fp128, metadata)
-declare i64 @llvm.experimental.constrained.llround.f128(fp128, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f128(fp128, metadata)
+declare i64 @llvm.experimental.constrained.llround.i64.f128(fp128, metadata)
 declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata)
 declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata)
 declare i1 @llvm.experimental.constrained.fcmps.f128(fp128, fp128, metadata, metadata)
diff --git a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir
index 0963ecbb123115d..a2532a854923f53 100644
--- a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir
+++ b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir
@@ -1,4 +1,5 @@
 # RUN: llc -run-pass=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s
+# RUN: llc -passes=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s
 
 # JumpTableDest32 uses an `adr` to a temporary label (itself). If duplicated we
 # cannot guarantee reachability for any uses after the first.
diff --git a/llvm/test/CodeGen/AArch64/llvm.frexp.ll b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
new file mode 100644
index 000000000000000..e4cb8ed6eaf90f1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.frexp.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
+; CHECK-LABEL: test_frexp_v2f16_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x19, sp, #36
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    add x0, sp, #32
+; CHECK-NEXT:    fcvt s1, h1
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, s1
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    fcvt h2, s0
+; CHECK-NEXT:    add x0, sp, #40
+; CHECK-NEXT:    mov h1, v1.h[2]
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v2.h[1], v1.h[0]
+; CHECK-NEXT:    str q2, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    fcvt h2, s0
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    mov h1, v1.h[3]
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v1.h[2], v2.h[0]
+; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    fcvt h2, s0
+; CHECK-NEXT:    ldr s1, [sp, #32]
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    ld1 { v1.s }[1], [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+  %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
+  ret { <2 x half>, <2 x i32> } %result
+}
+
+define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
+; CHECK-LABEL: test_frexp_v3f16_v3i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    add x0, sp, #56
+; CHECK-NEXT:    add x19, sp, #56
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x20, sp, #60
+; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov s0, v0.s[2]
+; CHECK-NEXT:    bl frexpf
+; CHECK-NEXT:    ldr s1, [sp, #44]
+; CHECK-NEXT:    ldr q2, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v1.s }[1], [x19]
+; CHECK-NEXT:    mov v2.s[2], v0.s[0]
+; CHECK-NEXT:    ld1 { v1.s }[2], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+  %result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3float.v3i32(<3 x float> %a)
+  ret { <3 x float>, <3 x i32> } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll
new file mode 100644
index 000000000000000..456b7f98974a9e1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos-fmf.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64-gnu-linux -debug-only=isel -o /dev/null < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+define { float, float } @test_sincos_f32_afn(float %a) {
+; CHECK-LABEL: Initial selection DAG: %bb.0 'test_sincos_f32_afn:'
+; CHECK-NEXT:  SelectionDAG has 9 nodes:
+; CHECK-NEXT:    t0: ch,glue = EntryToken
+; CHECK-NEXT:      t2: f32,ch = CopyFromReg t0, Register:f32 %0
+; CHECK-NEXT:    t3: f32,f32 = fsincos afn t2
+; CHECK-NEXT:    t5: ch,glue = CopyToReg t0, Register:f32 $s0, t3
+; CHECK-NEXT:    t7: ch,glue = CopyToReg t5, Register:f32 $s1, t3:1, t5:1
+; CHECK-NEXT:    t8: ch = AArch64ISD::RET_GLUE t7, Register:f32 $s0, Register:f32 $s1, t7:1
+  %result = call afn { float, float } @llvm.sincos.f32(float %a)
+  ret { float, float } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/llvm.sincos.ll b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
new file mode 100644
index 000000000000000..c5efc796e7a3c45
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/llvm.sincos.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=aarch64-none-linux < %s | FileCheck -check-prefixes=NO-LIBCALL %s
+
+define { half, half } @test_sincos_f16(half %a) {
+; CHECK-LABEL: test_sincos_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    fcvt h1, s1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    fcvt s8, h0
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h9, s0
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    fcvt h1, s1
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #12]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    fcvt s0, h0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  %result.0 = extractvalue { half, half } %result, 0
+  ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #8]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    fcvt s0, h0
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  %result.1 = extractvalue { half, half } %result, 1
+  ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s2, s0, [sp, #32]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp s3, s1, [sp, #24]
+; CHECK-NEXT:    fcvt h4, s0
+; CHECK-NEXT:    fcvt h2, s2
+; CHECK-NEXT:    fcvt h0, s1
+; CHECK-NEXT:    fcvt h1, s3
+; CHECK-NEXT:    ldp s5, s3, [sp, #40]
+; CHECK-NEXT:    fcvt h3, s3
+; CHECK-NEXT:    mov v0.h[1], v4.h[0]
+; CHECK-NEXT:    fcvt h4, s5
+; CHECK-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-NEXT:    ldp s5, s2, [sp, #56]
+; CHECK-NEXT:    mov v0.h[2], v3.h[0]
+; CHECK-NEXT:    fcvt h2, s2
+; CHECK-NEXT:    fcvt h3, s5
+; CHECK-NEXT:    mov v1.h[2], v4.h[0]
+; CHECK-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-NEXT:    mov v1.h[3], v3.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #80
+; NO-LIBCALL-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 80
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    .cfi_offset b10, -40
+; NO-LIBCALL-NEXT:    .cfi_offset b11, -48
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov h1, v0.h[1]
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fcvt s8, h1
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    fcvt s9, h1
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[2]
+; NO-LIBCALL-NEXT:    fcvt s10, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[3]
+; NO-LIBCALL-NEXT:    fcvt s11, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT:    fmov s0, s11
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[3], v0.h[0]
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT:    fmov s0, s11
+; NO-LIBCALL-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    fcvt h2, s1
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT:    add sp, sp, #80
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+; CHECK-LABEL: test_sincos_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    fmov s8, s0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fmov s9, s0
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { float, float } @llvm.sincos.f32(float %a)
+  ret { float, float } %result
+}
+
+define { <3 x float>, <3 x float> } @test_sincos_v3f32(<3 x float> %a) {
+; CHECK-LABEL: test_sincos_v3f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w21, -24
+; CHECK-NEXT:    .cfi_offset w22, -32
+; CHECK-NEXT:    .cfi_offset w30, -48
+; CHECK-NEXT:    add x0, sp, #20
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #28
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    add x21, sp, #44
+; CHECK-NEXT:    add x22, sp, #40
+; CHECK-NEXT:    mov s0, v0.s[2]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #16]
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[2], [x21]
+; CHECK-NEXT:    ld1 { v1.s }[2], [x22]
+; CHECK-NEXT:    ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v3f32:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #80
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 80
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov s9, v0.s[2]
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v1.s[2], v0.s[0]
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s2, s0
+; NO-LIBCALL-NEXT:    ldp q0, q1, [sp, #16] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.s[2], v2.s[0]
+; NO-LIBCALL-NEXT:    add sp, sp, #80
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <3 x float>, <3 x float> } @llvm.sincos.v3f32(<3 x float> %a)
+  ret { <3 x float>, <3 x float> } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #28
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #40]
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #64
+; NO-LIBCALL-NEXT:    str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -8
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    mov v1.s[1], v2.s[0]
+; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT:    add sp, sp, #64
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+; CHECK-LABEL: test_sincos_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #24]
+; CHECK-NEXT:    ldr d1, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    fmov d8, d0
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    fmov d9, d0
+; NO-LIBCALL-NEXT:    fmov d0, d8
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    fmov d1, d0
+; NO-LIBCALL-NEXT:    fmov d0, d9
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { double, double } @llvm.sincos.f64(double %a)
+  ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    add x0, sp, #56
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #32
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #32
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov d0, v0.d[1]
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #56]
+; CHECK-NEXT:    ldr d1, [sp, #40]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.d }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.d }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #64
+; NO-LIBCALL-NEXT:    str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -8
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -16
+; NO-LIBCALL-NEXT:    mov d8, v0.d[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov d0, d8
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov d0, d8
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    fmov d1, d0
+; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.d[1], v2.d[0]
+; NO-LIBCALL-NEXT:    add sp, sp, #64
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
index c64b3842aa5baa5..4bbbe40176313a0 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-cfi.ll
@@ -11,8 +11,7 @@ define void @a() "sign-return-address"="all" "sign-return-address-key"="b_key" {
 ; CHECK-NEXT:          .cfi_b_key_frame
 ; V8A-NEXT:            hint #27
 ; V83A-NEXT:           pacibsp
-; CHECK:               .cfi_negate_ra_state
-; CHECK-NEXT:          .cfi_def_cfa_offset
+; CHECK-NEXT:          .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
index 3221815da33c5e1..6a11bef08c7406b 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-diff-scope-same-key.ll
@@ -7,8 +7,7 @@ define void @a() "sign-return-address"="all" {
 ; CHECK-LABEL:      a:                                     // @a
 ; V8A:              hint #25
 ; V83A:             paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; CHECK-NEXT:      .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -55,8 +54,7 @@ define void @c() "sign-return-address"="all" {
 ; CHECK-LABEL:         c:              // @c
 ; V8A:                 hint #25
 ; V83A:                paciasp
-; CHECK:              .cfi_negate_ra_state
-; CHECK-NEXT:         .cfi_def_cfa_offset
+; CHECK-NEXT          .cfi_negate_ra_state
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
index d43b74b9451aae1..1e7224683c6c895 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-non-leaf.ll
@@ -1,15 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V8A
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 -mattr=+v8.3a %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V83A
 
 define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      a:                                     // @a
-; CHECK:                .cfi_b_key_frame
-; V8A-NEXT:             hint #27
-; V83A-NEXT:            pacibsp
-; CHECK:                .cfi_negate_ra_state
-; CHECK-NEXT:           .cfi_def_cfa_offset
+; V8A-LABEL: a:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    bl OUTLINED_FUNCTION_0
+; V8A-NEXT:    //APP
+; V8A-NEXT:    mov x30, x0
+; V8A-NEXT:    //NO_APP
+; V8A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: a:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    bl OUTLINED_FUNCTION_0
+; V83A-NEXT:    //APP
+; V83A-NEXT:    mov x30, x0
+; V83A-NEXT:    //NO_APP
+; V83A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -27,12 +56,40 @@ define i64 @a(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 }
 
 define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      b:                                     // @b
-; CHECK:                .cfi_b_key_frame
-; V8A-NEXT:             hint #27
-; V83A-NEXT:            pacibsp
-; CHECK:                .cfi_negate_ra_state
-; CHECK-NEXT:           .cfi_def_cfa_offset
+; V8A-LABEL: b:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    bl OUTLINED_FUNCTION_0
+; V8A-NEXT:    //APP
+; V8A-NEXT:    mov x30, x0
+; V8A-NEXT:    //NO_APP
+; V8A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: b:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    bl OUTLINED_FUNCTION_0
+; V83A-NEXT:    //APP
+; V83A-NEXT:    mov x30, x0
+; V83A-NEXT:    //NO_APP
+; V83A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -50,12 +107,40 @@ define i64 @b(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"
 }
 
 define i64 @c(i64 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      c:                                     // @c
-; CHECK:                .cfi_b_key_frame
-; V8A-NEXT:             hint #27
-; V83A-NEXT:            pacibsp
-; CHECK:                .cfi_negate_ra_state
-; CHECK-NEXT:           .cfi_def_cfa_offset
+; V8A-LABEL: c:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    bl OUTLINED_FUNCTION_0
+; V8A-NEXT:    //APP
+; V8A-NEXT:    mov x30, x0
+; V8A-NEXT:    //NO_APP
+; V8A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: c:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    bl OUTLINED_FUNCTION_0
+; V83A-NEXT:    //APP
+; V83A-NEXT:    mov x30, x0
+; V83A-NEXT:    //NO_APP
+; V83A-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
index ba27d1c681e3f4d..9a983cbd6714ee0 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-regsave.mir
@@ -82,8 +82,7 @@ body:             |
 # CHECK:          bb.0:
 # CHECK:            frame-setup EMITBKEY
 # CHECK-NEXT:       frame-setup PACIBSP implicit-def $lr, implicit $lr, implicit $sp
-# CHECK:            frame-setup CFI_INSTRUCTION negate_ra_sign_state
-# CHECK-NEXT:       frame-setup CFI_INSTRUCTION
+# CHECK-NEXT:       frame-setup CFI_INSTRUCTION negate_ra_sign_state
 # CHECK-NOT:        OUTLINED_FUNCTION_
 # CHECK:          bb.1:
 # CHECK-NOT:        OUTLINED_FUNCTION_
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
index 8c36ab4d8f403a1..87771f5de4f699a 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-same-scope-diff-key.ll
@@ -1,14 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V8A
 ; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple aarch64 -mattr=+v8.3a %s -o - | \
 ; RUN:   FileCheck %s --check-prefixes CHECK,V83A
 
 define void @a() "sign-return-address"="all" {
-; CHECK-LABEL:      a:                                     // @a
-; V8A:              hint #25
-; V83A:             paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; V8A-LABEL: a:
+; V8A:       // %bb.0:
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    mov w8, #1 // =0x1
+; V8A-NEXT:    mov w9, #2 // =0x2
+; V8A-NEXT:    stp w9, w8, [sp, #24]
+; V8A-NEXT:    mov w9, #3 // =0x3
+; V8A-NEXT:    mov w8, #4 // =0x4
+; V8A-NEXT:    stp w8, w9, [sp, #16]
+; V8A-NEXT:    mov w9, #5 // =0x5
+; V8A-NEXT:    mov w8, #6 // =0x6
+; V8A-NEXT:    stp w8, w9, [sp, #8]
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: a:
+; V83A:       // %bb.0:
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    mov w8, #1 // =0x1
+; V83A-NEXT:    mov w9, #2 // =0x2
+; V83A-NEXT:    stp w9, w8, [sp, #24]
+; V83A-NEXT:    mov w9, #3 // =0x3
+; V83A-NEXT:    mov w8, #4 // =0x4
+; V83A-NEXT:    stp w8, w9, [sp, #16]
+; V83A-NEXT:    mov w9, #5 // =0x5
+; V83A-NEXT:    mov w8, #6 // =0x6
+; V83A-NEXT:    stp w8, w9, [sp, #8]
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retaa
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -21,19 +53,48 @@ define void @a() "sign-return-address"="all" {
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
   store i32 6, ptr %6, align 4
-; V8A:            hint #29
-; V83A:           retaa
   ret void
-; CHECK:          .cfi_endproc
 }
 
 define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
-; CHECK-LABEL:      b:                                     // @b
-; CHECK:            .cfi_b_key_frame
-; V8A-NEXT:         hint #27
-; V83A-NEXT:        pacibsp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; V8A-LABEL: b:
+; V8A:       // %bb.0:
+; V8A-NEXT:    .cfi_b_key_frame
+; V8A-NEXT:    hint #27
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    mov w8, #1 // =0x1
+; V8A-NEXT:    mov w9, #2 // =0x2
+; V8A-NEXT:    stp w9, w8, [sp, #24]
+; V8A-NEXT:    mov w9, #3 // =0x3
+; V8A-NEXT:    mov w8, #4 // =0x4
+; V8A-NEXT:    stp w8, w9, [sp, #16]
+; V8A-NEXT:    mov w9, #5 // =0x5
+; V8A-NEXT:    mov w8, #6 // =0x6
+; V8A-NEXT:    stp w8, w9, [sp, #8]
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #31
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: b:
+; V83A:       // %bb.0:
+; V83A-NEXT:    .cfi_b_key_frame
+; V83A-NEXT:    pacibsp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    mov w8, #1 // =0x1
+; V83A-NEXT:    mov w9, #2 // =0x2
+; V83A-NEXT:    stp w9, w8, [sp, #24]
+; V83A-NEXT:    mov w9, #3 // =0x3
+; V83A-NEXT:    mov w8, #4 // =0x4
+; V83A-NEXT:    stp w8, w9, [sp, #16]
+; V83A-NEXT:    mov w9, #5 // =0x5
+; V83A-NEXT:    mov w8, #6 // =0x6
+; V83A-NEXT:    stp w8, w9, [sp, #8]
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retab
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -46,19 +107,46 @@ define void @b() "sign-return-address"="all" "sign-return-address-key"="b_key" {
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
   store i32 6, ptr %6, align 4
-; V8A-NOT:          hint #29
-; V83A-NOT:         autiasp
-; V83A-NOT:         retaa
   ret void
-; CHECK:            .cfi_endproc
 }
 
 define void @c() "sign-return-address"="all" {
-; CHECK-LABEL:      c:                                     // @c
-; V8A:              hint #25
-; V83A:             paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
+; V8A-LABEL: c:
+; V8A:       // %bb.0:
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    sub sp, sp, #32
+; V8A-NEXT:    .cfi_def_cfa_offset 32
+; V8A-NEXT:    mov w8, #1 // =0x1
+; V8A-NEXT:    mov w9, #2 // =0x2
+; V8A-NEXT:    stp w9, w8, [sp, #24]
+; V8A-NEXT:    mov w9, #3 // =0x3
+; V8A-NEXT:    mov w8, #4 // =0x4
+; V8A-NEXT:    stp w8, w9, [sp, #16]
+; V8A-NEXT:    mov w9, #5 // =0x5
+; V8A-NEXT:    mov w8, #6 // =0x6
+; V8A-NEXT:    stp w8, w9, [sp, #8]
+; V8A-NEXT:    add sp, sp, #32
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: c:
+; V83A:       // %bb.0:
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    sub sp, sp, #32
+; V83A-NEXT:    .cfi_def_cfa_offset 32
+; V83A-NEXT:    mov w8, #1 // =0x1
+; V83A-NEXT:    mov w9, #2 // =0x2
+; V83A-NEXT:    stp w9, w8, [sp, #24]
+; V83A-NEXT:    mov w9, #3 // =0x3
+; V83A-NEXT:    mov w8, #4 // =0x4
+; V83A-NEXT:    stp w8, w9, [sp, #16]
+; V83A-NEXT:    mov w9, #5 // =0x5
+; V83A-NEXT:    mov w8, #6 // =0x6
+; V83A-NEXT:    stp w8, w9, [sp, #8]
+; V83A-NEXT:    add sp, sp, #32
+; V83A-NEXT:    retaa
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
@@ -71,11 +159,10 @@ define void @c() "sign-return-address"="all" {
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
   store i32 6, ptr %6, align 4
-; V8A:            hint #29
-; V83A:           retaa
   ret void
-; CHECK:          .cfi_endproc
 }
 
 ; CHECK-NOT:      OUTLINED_FUNCTION_0:
 ; CHECK-NOT:      // -- Begin function
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
index d5ef94e900993c8..a7ea32952f3b78e 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-subtarget.ll
@@ -10,8 +10,7 @@ define void @a() #0 {
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
 ; CHECK-NEXT:               pacibsp
-; CHECK:                    .cfi_negate_ra_state
-; CHECK-NEXT:               .cfi_def_cfa_offset
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -35,8 +34,7 @@ define void @b() #0 {
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
 ; CHECK-NEXT:               pacibsp
-; CHECK:                    .cfi_negate_ra_state
-; CHECK-NEXT:               .cfi_def_cfa_offset
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
@@ -60,8 +58,7 @@ define void @c() #1 {
 ; CHECK:            // %bb.0:
 ; CHECK-NEXT:               .cfi_b_key_frame
 ; CHECK-NEXT:               hint #27
-; CHECK:                    .cfi_negate_ra_state
-; CHECK-NEXT:               .cfi_def_cfa_offset
+; CHECK-NEXT:               .cfi_negate_ra_state
 ; CHECK-NOT:                OUTLINED_FUNCTION_
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
index 3e361111b545532..da68ea5bf0dbcb9 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner -outliner-leaf-descendants=false \
 ; RUN:   -verify-machineinstrs %s -o - | FileCheck --check-prefixes CHECK,V8A %s
 ; RUN: llc -mtriple aarch64 -enable-machine-outliner -outliner-leaf-descendants=false \
@@ -7,15 +8,38 @@
 declare i32 @thunk_called_fn(i32, i32, i32, i32)
 
 define i32 @a() #0 {
-; CHECK-LABEL:  a:                                      // @a
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: a:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    bl thunk_called_fn
+; V8A-NEXT:    add w0, w0, #8
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: a:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    bl thunk_called_fn
+; V83A-NEXT:    add w0, w0, #8
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
   %cx = add i32 %call, 8
@@ -23,15 +47,38 @@ entry:
 }
 
 define i32 @b() #0 {
-; CHECK-LABEL:  b:                                      // @b
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: b:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    bl thunk_called_fn
+; V8A-NEXT:    add w0, w0, #88
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: b:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    bl thunk_called_fn
+; V83A-NEXT:    add w0, w0, #88
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 @thunk_called_fn(i32 1, i32 2, i32 3, i32 4)
   %cx = add i32 %call, 88
@@ -39,15 +86,40 @@ entry:
 }
 
 define hidden i32 @c(ptr %fptr) #0 {
-; CHECK-LABEL:  c:                                      // @c
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: c:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov x8, x0
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    blr x8
+; V8A-NEXT:    add w0, w0, #8
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: c:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov x8, x0
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    blr x8
+; V83A-NEXT:    add w0, w0, #8
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4)
   %add = add nsw i32 %call, 8
@@ -55,15 +127,40 @@ entry:
 }
 
 define hidden i32 @d(ptr %fptr) #0 {
-; CHECK-LABEL:  d:                                      // @d
-; CHECK:        // %bb.0:                               // %entry
-; V8A-NEXT:         hint #25
-; V83A-NEXT:        paciasp
-; CHECK:            .cfi_negate_ra_state
-; CHECK-NEXT:       .cfi_def_cfa_offset
-; V8A:              hint #29
-; V8A-NEXT:         ret
-; V83A:             retaa
+; V8A-LABEL: d:
+; V8A:       // %bb.0: // %entry
+; V8A-NEXT:    hint #25
+; V8A-NEXT:    .cfi_negate_ra_state
+; V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V8A-NEXT:    .cfi_def_cfa_offset 16
+; V8A-NEXT:    .cfi_offset w30, -16
+; V8A-NEXT:    mov x8, x0
+; V8A-NEXT:    mov w0, #1 // =0x1
+; V8A-NEXT:    mov w1, #2 // =0x2
+; V8A-NEXT:    mov w2, #3 // =0x3
+; V8A-NEXT:    mov w3, #4 // =0x4
+; V8A-NEXT:    blr x8
+; V8A-NEXT:    add w0, w0, #88
+; V8A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V8A-NEXT:    hint #29
+; V8A-NEXT:    ret
+;
+; V83A-LABEL: d:
+; V83A:       // %bb.0: // %entry
+; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; V83A-NEXT:    .cfi_def_cfa_offset 16
+; V83A-NEXT:    .cfi_offset w30, -16
+; V83A-NEXT:    mov x8, x0
+; V83A-NEXT:    mov w0, #1 // =0x1
+; V83A-NEXT:    mov w1, #2 // =0x2
+; V83A-NEXT:    mov w2, #3 // =0x3
+; V83A-NEXT:    mov w3, #4 // =0x4
+; V83A-NEXT:    blr x8
+; V83A-NEXT:    add w0, w0, #88
+; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; V83A-NEXT:    retaa
 entry:
   %call = tail call i32 %fptr(i32 1, i32 2, i32 3, i32 4)
   %add = add nsw i32 %call, 88
diff --git a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir
index 406025c4fde3022..90ff68d30a3a0e5 100644
--- a/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir
+++ b/llvm/test/CodeGen/AArch64/mlicm-stack-write-check.mir
@@ -3,9 +3,6 @@
 ---
 name: test
 tracksRegLiveness: true
-isSSA: false
-registers:
-  - { id: 0, class: gpr64 }
 stack:
   - { id: 0, size: 8, type: spill-slot }
 body: |
@@ -30,14 +27,11 @@ body: |
 
   bb.2:
     liveins: $x0
-    %0 = COPY $x0
 ...
+
 ---
 name: test2
 tracksRegLiveness: true
-isSSA: false
-registers:
-  - { id: 0, class: gpr64 }
 stack:
   - { id: 0, size: 8, type: spill-slot }
 body: |
@@ -62,5 +56,4 @@ body: |
 
   bb.2:
     liveins: $x0
-    %0 = COPY $x0
 ...
diff --git a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
index 0969ec246399fe5..373c4969a9405c3 100644
--- a/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
+++ b/llvm/test/CodeGen/AArch64/pacbti-llvm-generated-funcs-2.ll
@@ -35,8 +35,7 @@ entry:
 ;; CHECK-LABEL: __llvm_gcov_writeout:
 ;; CHECK:       .cfi_b_key_frame
 ;; CHECK-NEXT:  pacibsp
-;; CHECK:       .cfi_negate_ra_state
-;; CHECK-NEXT:  .cfi_def_cfa_offset
+;; CHECK-NEXT:  .cfi_negate_ra_state
 
 define internal void @__llvm_gcov_reset() unnamed_addr #2 {
 entry:
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
index eb224bbbd601fbc..4d4b7c215b978ae 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address-cfi-negate-ra-state.ll
@@ -10,8 +10,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
 ; CHECK-V8A-LABEL: _Z3fooi:
 ; CHECK-V8A:       // %bb.0: // %entry
 ; CHECK-V8A-NEXT:    hint #25
-; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_negate_ra_state
+; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V8A-NEXT:    str w0, [sp, #8]
@@ -28,8 +28,8 @@ define dso_local i32 @_Z3fooi(i32 %x) #0 {
 ; CHECK-V83A-LABEL: _Z3fooi:
 ; CHECK-V83A:       // %bb.0: // %entry
 ; CHECK-V83A-NEXT:    paciasp
-; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_negate_ra_state
+; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V83A-NEXT:    str w0, [sp, #8]
@@ -144,8 +144,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
 ; CHECK-V8A-LABEL: baz_sync:
 ; CHECK-V8A:       // %bb.0: // %entry
 ; CHECK-V8A-NEXT:    hint #25
-; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_negate_ra_state
+; CHECK-V8A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V8A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V8A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V8A-NEXT:    cbz w0, .LBB2_2
@@ -165,8 +165,8 @@ define hidden noundef i32 @baz_sync(i32 noundef %a) #0 uwtable(sync) {
 ; CHECK-V83A-LABEL: baz_sync:
 ; CHECK-V83A:       // %bb.0: // %entry
 ; CHECK-V83A-NEXT:    paciasp
-; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_negate_ra_state
+; CHECK-V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-V83A-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-V83A-NEXT:    .cfi_offset w30, -16
 ; CHECK-V83A-NEXT:    cbz w0, .LBB2_2
@@ -229,7 +229,6 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP:   DW_CFA_restore_state:
 ; CHECK-DUMP:   DW_CFA_AARCH64_negate_ra_state:
 
-; CHECK-DUMP: CFA=WSP{{$}}
 ;; First DW_CFA_AARCH64_negate_ra_state:
 ; CHECK-DUMP: reg34=1
 ;; Second DW_CFA_AARCH64_negate_ra_state:
@@ -238,7 +237,7 @@ attributes #0 = { "sign-return-address"="all" }
 ; CHECK-DUMP: reg34=1
 ;; Third DW_CFA_AARCH64_negate_ra_state:
 ; CHECK-DUMP: reg34=0
-; CHECK-DUMP-NOT: reg34=
+; CHECK-DUMP-NOT: reg34=1
 
 ; baz_sync
 ; CHECK-DUMP-LABEL: FDE
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll b/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll
index 3d133e02106bc8b..fa689d2b9d7fddc 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address-pauth-lr.ll
@@ -62,8 +62,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp0
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp0
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp0
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -73,8 +74,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp0
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp0
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp0
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -82,7 +84,9 @@ define i32 @leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-addr
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp0:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp0
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp0
 ; PAUTHLR-NEXT:    retaasppc .Ltmp0
   ret i32 %x
 }
@@ -93,15 +97,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp1:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
 ; COMPAT-NEXT:    mov x30, x0
 ; COMPAT-NEXT:    //NO_APP
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp1
+; COMPAT-NEXT:    adrp x16, .Ltmp1
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp1
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -111,15 +116,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp1:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
 ; V83A-NEXT:    mov x30, x0
 ; V83A-NEXT:    //NO_APP
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp1
+; V83A-NEXT:    adrp x16, .Ltmp1
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp1
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -127,14 +133,16 @@ define i64 @leaf_clobbers_lr(i64 %x) "branch-protection-pauth-lr" "sign-return-a
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp1:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    //APP
 ; PAUTHLR-NEXT:    mov x30, x0
 ; PAUTHLR-NEXT:    //NO_APP
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp1
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp1
 ; PAUTHLR-NEXT:    retaasppc .Ltmp1
   call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1
   ret i64 %x
@@ -148,13 +156,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp2:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp2
+; COMPAT-NEXT:    adrp x16, .Ltmp2
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -164,13 +173,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp2:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp2
+; V83A-NEXT:    adrp x16, .Ltmp2
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -178,12 +188,14 @@ define i32 @non_leaf_sign_all(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp2:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    bl foo
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp2
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp2
 ; PAUTHLR-NEXT:    retaasppc .Ltmp2
   %call = call i32 @foo(i32 %x)
   ret i32 %call
@@ -195,13 +207,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp3:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp3
+; COMPAT-NEXT:    adrp x16, .Ltmp3
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -211,13 +224,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp3:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp3
+; V83A-NEXT:    adrp x16, .Ltmp3
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -225,12 +239,14 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "branch-protection-pauth-lr" "sign-re
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp3:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    bl foo
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp3
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp3
 ; PAUTHLR-NEXT:    retaasppc .Ltmp3
   %call = call i32 @foo(i32 %x)
   ret i32 %call
@@ -245,13 +261,14 @@ define i32 @non_leaf_scs(i32 %x) "branch-protection-pauth-lr" "sign-return-addre
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp4:
 ; CHECK-NEXT:    paciasp
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT:    .cfi_negate_ra_state
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl foo
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT:    adr x16, .Ltmp4
+; CHECK-NEXT:    adrp x16, .Ltmp4
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp4
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    autiasp
 ; CHECK-NEXT:    ldr x30, [x18, #-8]!
@@ -263,12 +280,14 @@ define i32 @non_leaf_scs(i32 %x) "branch-protection-pauth-lr" "sign-return-addre
 ; PAUTHLR-NEXT:    .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 //
 ; PAUTHLR-NEXT:  .Ltmp4:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    bl foo
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp4
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp4
 ; PAUTHLR-NEXT:    autiasppc .Ltmp4
 ; PAUTHLR-NEXT:    ldr x30, [x18, #-8]!
 ; PAUTHLR-NEXT:    ret
@@ -282,8 +301,9 @@ define i32 @leaf_sign_all_v83(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp5:
 ; CHECK-NEXT:    paciasp
-; CHECK-NEXT:    .cfi_negate_ra_state
-; CHECK-NEXT:    adr x16, .Ltmp5
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
+; CHECK-NEXT:    adrp x16, .Ltmp5
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp5
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    retaa
 ;
@@ -291,7 +311,9 @@ define i32 @leaf_sign_all_v83(i32 %x) "branch-protection-pauth-lr" "sign-return-
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp5:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp5
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp5
 ; PAUTHLR-NEXT:    retaasppc .Ltmp5
   ret i32 %x
 }
@@ -304,15 +326,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr"
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp6:
 ; COMPAT-NEXT:    hint #25
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
 ; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; COMPAT-NEXT:    .cfi_negate_ra_state
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
 ; COMPAT-NEXT:    mov x30, x0
 ; COMPAT-NEXT:    //NO_APP
 ; COMPAT-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; COMPAT-NEXT:    adr x16, .Ltmp6
+; COMPAT-NEXT:    adrp x16, .Ltmp6
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp6
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    b bar
@@ -322,15 +345,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr"
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp6:
 ; V83A-NEXT:    paciasp
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
 ; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; V83A-NEXT:    .cfi_negate_ra_state
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
 ; V83A-NEXT:    mov x30, x0
 ; V83A-NEXT:    //NO_APP
 ; V83A-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
-; V83A-NEXT:    adr x16, .Ltmp6
+; V83A-NEXT:    adrp x16, .Ltmp6
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp6
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    autiasp
 ; V83A-NEXT:    b bar
@@ -339,14 +363,16 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "branch-protection-pauth-lr"
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp6:
 ; PAUTHLR-NEXT:    paciasppc
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
 ; PAUTHLR-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
 ; PAUTHLR-NEXT:    .cfi_def_cfa_offset 16
 ; PAUTHLR-NEXT:    .cfi_offset w30, -16
 ; PAUTHLR-NEXT:    //APP
 ; PAUTHLR-NEXT:    mov x30, x0
 ; PAUTHLR-NEXT:    //NO_APP
 ; PAUTHLR-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; PAUTHLR-NEXT:    adrp x16, .Ltmp6
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp6
 ; PAUTHLR-NEXT:    autiasppc .Ltmp6
 ; PAUTHLR-NEXT:    b bar
   call void asm sideeffect "mov x30, $0", "r,~{lr}"(i64 %x) #1
@@ -360,8 +386,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp7:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp7
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp7
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp7
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -371,8 +398,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp7:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp7
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp7
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp7
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -380,7 +408,9 @@ define i32 @leaf_sign_all_a_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; PAUTHLR:       // %bb.0:
 ; PAUTHLR-NEXT:  .Ltmp7:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp7
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp7
 ; PAUTHLR-NEXT:    retaasppc .Ltmp7
   ret i32 %x
 }
@@ -392,8 +422,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp8:
 ; COMPAT-NEXT:    hint #27
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp8
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp8
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp8
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #31
 ; COMPAT-NEXT:    ret
@@ -404,8 +435,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp8:
 ; V83A-NEXT:    pacibsp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp8
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp8
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp8
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retab
 ;
@@ -414,7 +446,9 @@ define i32 @leaf_sign_all_b_key(i32 %x) "branch-protection-pauth-lr" "sign-retur
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp8:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp8
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp8
 ; PAUTHLR-NEXT:    retabsppc .Ltmp8
   ret i32 %x
 }
@@ -426,8 +460,9 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp9:
 ; CHECK-NEXT:    pacibsp
-; CHECK-NEXT:    .cfi_negate_ra_state
-; CHECK-NEXT:    adr x16, .Ltmp9
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
+; CHECK-NEXT:    adrp x16, .Ltmp9
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp9
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    retab
 ;
@@ -436,7 +471,9 @@ define i32 @leaf_sign_all_v83_b_key(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp9:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp9
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp9
 ; PAUTHLR-NEXT:    retabsppc .Ltmp9
   ret i32 %x
 }
@@ -449,8 +486,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp10:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp10
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp10
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp10
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #29
 ; COMPAT-NEXT:    ret
@@ -461,8 +499,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp10:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp10
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp10
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp10
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retaa
 ;
@@ -471,7 +510,9 @@ define i32 @leaf_sign_all_a_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; PAUTHLR-NEXT:    bti c
 ; PAUTHLR-NEXT:  .Ltmp10:
 ; PAUTHLR-NEXT:    paciasppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp10
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp10
 ; PAUTHLR-NEXT:    retaasppc .Ltmp10
   ret i32 %x
 }
@@ -485,8 +526,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:  .Ltmp11:
 ; COMPAT-NEXT:    hint #27
-; COMPAT-NEXT:    .cfi_negate_ra_state
-; COMPAT-NEXT:    adr x16, .Ltmp11
+; COMPAT-NEXT:    .cfi_negate_ra_state_with_pc
+; COMPAT-NEXT:    adrp x16, .Ltmp11
+; COMPAT-NEXT:    add x16, x16, :lo12:.Ltmp11
 ; COMPAT-NEXT:    hint #39
 ; COMPAT-NEXT:    hint #31
 ; COMPAT-NEXT:    ret
@@ -498,8 +540,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:  .Ltmp11:
 ; V83A-NEXT:    pacibsp
-; V83A-NEXT:    .cfi_negate_ra_state
-; V83A-NEXT:    adr x16, .Ltmp11
+; V83A-NEXT:    .cfi_negate_ra_state_with_pc
+; V83A-NEXT:    adrp x16, .Ltmp11
+; V83A-NEXT:    add x16, x16, :lo12:.Ltmp11
 ; V83A-NEXT:    hint #39
 ; V83A-NEXT:    retab
 ;
@@ -509,7 +552,9 @@ define i32 @leaf_sign_all_b_key_bti(i32 %x) "branch-protection-pauth-lr" "sign-r
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp11:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp11
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp11
 ; PAUTHLR-NEXT:    retabsppc .Ltmp11
   ret i32 %x
 }
@@ -523,8 +568,9 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "branch-protection-pauth-lr" "si
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:  .Ltmp12:
 ; CHECK-NEXT:    pacibsp
-; CHECK-NEXT:    .cfi_negate_ra_state
-; CHECK-NEXT:    adr x16, .Ltmp12
+; CHECK-NEXT:    .cfi_negate_ra_state_with_pc
+; CHECK-NEXT:    adrp x16, .Ltmp12
+; CHECK-NEXT:    add x16, x16, :lo12:.Ltmp12
 ; CHECK-NEXT:    hint #39
 ; CHECK-NEXT:    retab
 ;
@@ -534,7 +580,9 @@ define i32 @leaf_sign_all_v83_b_key_bti(i32 %x) "branch-protection-pauth-lr" "si
 ; PAUTHLR-NEXT:    .cfi_b_key_frame
 ; PAUTHLR-NEXT:  .Ltmp12:
 ; PAUTHLR-NEXT:    pacibsppc
-; PAUTHLR-NEXT:    .cfi_negate_ra_state
+; PAUTHLR-NEXT:    .cfi_negate_ra_state_with_pc
+; PAUTHLR-NEXT:    adrp x16, .Ltmp12
+; PAUTHLR-NEXT:    add x16, x16, :lo12:.Ltmp12
 ; PAUTHLR-NEXT:    retabsppc .Ltmp12
   ret i32 %x
 }
diff --git a/llvm/test/CodeGen/AArch64/sign-return-address.ll b/llvm/test/CodeGen/AArch64/sign-return-address.ll
index c33463eb96a687c..dafe0d71ceb5f76 100644
--- a/llvm/test/CodeGen/AArch64/sign-return-address.ll
+++ b/llvm/test/CodeGen/AArch64/sign-return-address.ll
@@ -46,8 +46,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf"  {
 ; COMPAT-LABEL: leaf_clobbers_lr:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
@@ -60,8 +60,8 @@ define i64 @leaf_clobbers_lr(i64 %x) "sign-return-address"="non-leaf"  {
 ; V83A-LABEL: leaf_clobbers_lr:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
@@ -79,8 +79,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: non_leaf_sign_all:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
@@ -91,8 +91,8 @@ define i32 @non_leaf_sign_all(i32 %x) "sign-return-address"="all" {
 ; V83A-LABEL: non_leaf_sign_all:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
@@ -106,8 +106,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 ; COMPAT-LABEL: non_leaf_sign_non_leaf:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    bl foo
@@ -118,8 +118,8 @@ define i32 @non_leaf_sign_non_leaf(i32 %x) "sign-return-address"="non-leaf"  {
 ; V83A-LABEL: non_leaf_sign_non_leaf:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    bl foo
@@ -136,8 +136,8 @@ define i32 @non_leaf_scs(i32 %x) "sign-return-address"="non-leaf" shadowcallstac
 ; CHECK-NEXT:    str x30, [x18], #8
 ; CHECK-NEXT:    .cfi_escape 0x16, 0x12, 0x02, 0x82, 0x78 //
 ; CHECK-NEXT:    paciasp
-; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_negate_ra_state
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl foo
@@ -164,8 +164,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 ; COMPAT-LABEL: spill_lr_and_tail_call:
 ; COMPAT:       // %bb.0:
 ; COMPAT-NEXT:    hint #25
-; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_negate_ra_state
+; COMPAT-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; COMPAT-NEXT:    .cfi_def_cfa_offset 16
 ; COMPAT-NEXT:    .cfi_offset w30, -16
 ; COMPAT-NEXT:    //APP
@@ -178,8 +178,8 @@ define fastcc void @spill_lr_and_tail_call(i64 %x) "sign-return-address"="all" {
 ; V83A-LABEL: spill_lr_and_tail_call:
 ; V83A:       // %bb.0:
 ; V83A-NEXT:    paciasp
-; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_negate_ra_state
+; V83A-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; V83A-NEXT:    .cfi_def_cfa_offset 16
 ; V83A-NEXT:    .cfi_offset w30, -16
 ; V83A-NEXT:    //APP
diff --git a/llvm/test/CodeGen/AArch64/srem-lkk.ll b/llvm/test/CodeGen/AArch64/srem-lkk.ll
index 5ff178937ebbfb4..d9f91449dffb808 100644
--- a/llvm/test/CodeGen/AArch64/srem-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-lkk.ll
@@ -4,14 +4,14 @@
 define i32 @fold_srem_positive_odd(i32 %x) {
 ; CHECK-LABEL: fold_srem_positive_odd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #37253
+; CHECK-NEXT:    mov w8, #37253 // =0x9185
 ; CHECK-NEXT:    movk w8, #44150, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
 ; CHECK-NEXT:    lsr x8, x8, #32
 ; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    asr w9, w8, #6
 ; CHECK-NEXT:    add w8, w9, w8, lsr #31
-; CHECK-NEXT:    mov w9, #95
+; CHECK-NEXT:    mov w9, #95 // =0x5f
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, 95
@@ -22,13 +22,13 @@ define i32 @fold_srem_positive_odd(i32 %x) {
 define i32 @fold_srem_positive_even(i32 %x) {
 ; CHECK-LABEL: fold_srem_positive_even:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #36849
+; CHECK-NEXT:    mov w8, #36849 // =0x8ff1
 ; CHECK-NEXT:    movk w8, #15827, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
 ; CHECK-NEXT:    lsr x9, x8, #63
 ; CHECK-NEXT:    asr x8, x8, #40
 ; CHECK-NEXT:    add w8, w8, w9
-; CHECK-NEXT:    mov w9, #1060
+; CHECK-NEXT:    mov w9, #1060 // =0x424
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, 1060
@@ -39,13 +39,13 @@ define i32 @fold_srem_positive_even(i32 %x) {
 define i32 @fold_srem_negative_odd(i32 %x) {
 ; CHECK-LABEL: fold_srem_negative_odd:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65445
+; CHECK-NEXT:    mov w8, #65445 // =0xffa5
 ; CHECK-NEXT:    movk w8, #42330, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
 ; CHECK-NEXT:    lsr x9, x8, #63
 ; CHECK-NEXT:    asr x8, x8, #40
 ; CHECK-NEXT:    add w8, w8, w9
-; CHECK-NEXT:    mov w9, #-723
+; CHECK-NEXT:    mov w9, #-723 // =0xfffffd2d
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, -723
@@ -56,13 +56,13 @@ define i32 @fold_srem_negative_odd(i32 %x) {
 define i32 @fold_srem_negative_even(i32 %x) {
 ; CHECK-LABEL: fold_srem_negative_even:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #62439
+; CHECK-NEXT:    mov w8, #62439 // =0xf3e7
 ; CHECK-NEXT:    movk w8, #64805, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
 ; CHECK-NEXT:    lsr x9, x8, #63
 ; CHECK-NEXT:    asr x8, x8, #40
 ; CHECK-NEXT:    add w8, w8, w9
-; CHECK-NEXT:    mov w9, #-22981
+; CHECK-NEXT:    mov w9, #-22981 // =0xffffa63b
 ; CHECK-NEXT:    msub w0, w8, w9, w0
 ; CHECK-NEXT:    ret
   %1 = srem i32 %x, -22981
@@ -74,14 +74,14 @@ define i32 @fold_srem_negative_even(i32 %x) {
 define i32 @combine_srem_sdiv(i32 %x) {
 ; CHECK-LABEL: combine_srem_sdiv:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #37253
+; CHECK-NEXT:    mov w8, #37253 // =0x9185
 ; CHECK-NEXT:    movk w8, #44150, lsl #16
 ; CHECK-NEXT:    smull x8, w0, w8
 ; CHECK-NEXT:    lsr x8, x8, #32
 ; CHECK-NEXT:    add w8, w8, w0
 ; CHECK-NEXT:    asr w9, w8, #6
 ; CHECK-NEXT:    add w8, w9, w8, lsr #31
-; CHECK-NEXT:    mov w9, #95
+; CHECK-NEXT:    mov w9, #95 // =0x5f
 ; CHECK-NEXT:    msub w9, w8, w9, w0
 ; CHECK-NEXT:    add w0, w9, w8
 ; CHECK-NEXT:    ret
@@ -95,14 +95,14 @@ define i32 @combine_srem_sdiv(i32 %x) {
 define i64 @dont_fold_srem_i64(i64 %x) {
 ; CHECK-LABEL: dont_fold_srem_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #58849
+; CHECK-NEXT:    mov x8, #58849 // =0xe5e1
 ; CHECK-NEXT:    movk x8, #48148, lsl #16
 ; CHECK-NEXT:    movk x8, #33436, lsl #32
 ; CHECK-NEXT:    movk x8, #21399, lsl #48
 ; CHECK-NEXT:    smulh x8, x0, x8
 ; CHECK-NEXT:    asr x9, x8, #5
 ; CHECK-NEXT:    add x8, x9, x8, lsr #63
-; CHECK-NEXT:    mov w9, #98
+; CHECK-NEXT:    mov w9, #98 // =0x62
 ; CHECK-NEXT:    msub x0, x8, x9, x0
 ; CHECK-NEXT:    ret
   %1 = srem i64 %x, 98
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
index 7f23434c9dfdd6c..75d4d8816fb30d2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s
+; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -amdgpu-memcpy-loop-unroll=2 -mem-intrinsic-expand-size=35 %s -o - | FileCheck -check-prefix=LOOP %s
+; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -amdgpu-memcpy-loop-unroll=2 -mem-intrinsic-expand-size=37 %s -o - | FileCheck -check-prefix=UNROLL %s
 
 declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg)
 
@@ -14,104 +14,176 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; LOOP-NEXT:    v_mov_b32_e32 v4, s0
 ; LOOP-NEXT:  .LBB0_1: ; %load-store-loop
 ; LOOP-NEXT:    ; =>This Inner Loop Header: Depth=1
+; LOOP-NEXT:    s_waitcnt expcnt(2)
+; LOOP-NEXT:    v_add_i32_e32 v29, vcc, v2, v4
+; LOOP-NEXT:    v_addc_u32_e32 v30, vcc, v3, v5, vcc
+; LOOP-NEXT:    buffer_load_ubyte v24, v[29:30], s[0:3], 0 addr64
+; LOOP-NEXT:    buffer_load_ubyte v27, v[29:30], s[0:3], 0 addr64 offset:1
+; LOOP-NEXT:    buffer_load_ubyte v34, v[29:30], s[0:3], 0 addr64 offset:2
+; LOOP-NEXT:    buffer_load_ubyte v35, v[29:30], s[0:3], 0 addr64 offset:3
+; LOOP-NEXT:    buffer_load_ubyte v36, v[29:30], s[0:3], 0 addr64 offset:4
+; LOOP-NEXT:    buffer_load_ubyte v37, v[29:30], s[0:3], 0 addr64 offset:5
+; LOOP-NEXT:    buffer_load_ubyte v38, v[29:30], s[0:3], 0 addr64 offset:6
+; LOOP-NEXT:    buffer_load_ubyte v39, v[29:30], s[0:3], 0 addr64 offset:7
+; LOOP-NEXT:    buffer_load_ubyte v6, v[29:30], s[0:3], 0 addr64 offset:8
+; LOOP-NEXT:    buffer_load_ubyte v9, v[29:30], s[0:3], 0 addr64 offset:9
+; LOOP-NEXT:    buffer_load_ubyte v10, v[29:30], s[0:3], 0 addr64 offset:10
 ; LOOP-NEXT:    s_waitcnt expcnt(0)
-; LOOP-NEXT:    v_add_i32_e32 v6, vcc, v2, v4
-; LOOP-NEXT:    v_addc_u32_e32 v7, vcc, v3, v5, vcc
-; LOOP-NEXT:    v_add_i32_e32 v8, vcc, v0, v4
-; LOOP-NEXT:    v_addc_u32_e32 v9, vcc, v1, v5, vcc
-; LOOP-NEXT:    v_add_i32_e32 v4, vcc, 16, v4
-; LOOP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; LOOP-NEXT:    buffer_load_ubyte v10, v[6:7], s[0:3], 0 addr64
-; LOOP-NEXT:    buffer_load_ubyte v11, v[6:7], s[0:3], 0 addr64 offset:1
-; LOOP-NEXT:    buffer_load_ubyte v12, v[6:7], s[0:3], 0 addr64 offset:2
-; LOOP-NEXT:    buffer_load_ubyte v13, v[6:7], s[0:3], 0 addr64 offset:3
-; LOOP-NEXT:    buffer_load_ubyte v14, v[6:7], s[0:3], 0 addr64 offset:4
-; LOOP-NEXT:    buffer_load_ubyte v15, v[6:7], s[0:3], 0 addr64 offset:5
-; LOOP-NEXT:    buffer_load_ubyte v16, v[6:7], s[0:3], 0 addr64 offset:6
-; LOOP-NEXT:    buffer_load_ubyte v17, v[6:7], s[0:3], 0 addr64 offset:7
-; LOOP-NEXT:    buffer_load_ubyte v18, v[6:7], s[0:3], 0 addr64 offset:8
-; LOOP-NEXT:    buffer_load_ubyte v19, v[6:7], s[0:3], 0 addr64 offset:9
-; LOOP-NEXT:    buffer_load_ubyte v20, v[6:7], s[0:3], 0 addr64 offset:10
-; LOOP-NEXT:    buffer_load_ubyte v21, v[6:7], s[0:3], 0 addr64 offset:11
-; LOOP-NEXT:    buffer_load_ubyte v22, v[6:7], s[0:3], 0 addr64 offset:12
-; LOOP-NEXT:    buffer_load_ubyte v23, v[6:7], s[0:3], 0 addr64 offset:13
-; LOOP-NEXT:    buffer_load_ubyte v24, v[6:7], s[0:3], 0 addr64 offset:14
-; LOOP-NEXT:    buffer_load_ubyte v6, v[6:7], s[0:3], 0 addr64 offset:15
-; LOOP-NEXT:    v_cmp_gt_u32_e32 vcc, 16, v4
+; LOOP-NEXT:    buffer_load_ubyte v11, v[29:30], s[0:3], 0 addr64 offset:11
+; LOOP-NEXT:    buffer_load_ubyte v7, v[29:30], s[0:3], 0 addr64 offset:12
+; LOOP-NEXT:    buffer_load_ubyte v13, v[29:30], s[0:3], 0 addr64 offset:13
+; LOOP-NEXT:    buffer_load_ubyte v14, v[29:30], s[0:3], 0 addr64 offset:14
+; LOOP-NEXT:    buffer_load_ubyte v15, v[29:30], s[0:3], 0 addr64 offset:15
+; LOOP-NEXT:    buffer_load_ubyte v8, v[29:30], s[0:3], 0 addr64 offset:16
+; LOOP-NEXT:    buffer_load_ubyte v17, v[29:30], s[0:3], 0 addr64 offset:17
+; LOOP-NEXT:    buffer_load_ubyte v18, v[29:30], s[0:3], 0 addr64 offset:18
+; LOOP-NEXT:    buffer_load_ubyte v19, v[29:30], s[0:3], 0 addr64 offset:19
+; LOOP-NEXT:    buffer_load_ubyte v12, v[29:30], s[0:3], 0 addr64 offset:20
+; LOOP-NEXT:    buffer_load_ubyte v21, v[29:30], s[0:3], 0 addr64 offset:21
+; LOOP-NEXT:    buffer_load_ubyte v22, v[29:30], s[0:3], 0 addr64 offset:22
+; LOOP-NEXT:    buffer_load_ubyte v23, v[29:30], s[0:3], 0 addr64 offset:23
+; LOOP-NEXT:    buffer_load_ubyte v16, v[29:30], s[0:3], 0 addr64 offset:24
+; LOOP-NEXT:    buffer_load_ubyte v25, v[29:30], s[0:3], 0 addr64 offset:25
+; LOOP-NEXT:    buffer_load_ubyte v26, v[29:30], s[0:3], 0 addr64 offset:26
+; LOOP-NEXT:    buffer_load_ubyte v28, v[29:30], s[0:3], 0 addr64 offset:27
+; LOOP-NEXT:    buffer_load_ubyte v20, v[29:30], s[0:3], 0 addr64 offset:28
+; LOOP-NEXT:    buffer_load_ubyte v31, v[29:30], s[0:3], 0 addr64 offset:29
+; LOOP-NEXT:    buffer_load_ubyte v32, v[29:30], s[0:3], 0 addr64 offset:30
+; LOOP-NEXT:    buffer_load_ubyte v33, v[29:30], s[0:3], 0 addr64 offset:31
 ; LOOP-NEXT:    s_waitcnt vmcnt(14)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v7, 8, v11
+; LOOP-NEXT:    v_lshlrev_b32_e32 v27, 8, v27
+; LOOP-NEXT:    v_or_b32_e32 v24, v27, v24
+; LOOP-NEXT:    v_lshlrev_b32_e32 v27, 24, v35
+; LOOP-NEXT:    v_lshlrev_b32_e32 v29, 16, v34
+; LOOP-NEXT:    v_or_b32_e32 v27, v27, v29
+; LOOP-NEXT:    v_lshlrev_b32_e32 v29, 8, v37
+; LOOP-NEXT:    v_lshlrev_b32_e32 v30, 24, v39
+; LOOP-NEXT:    v_lshlrev_b32_e32 v34, 16, v38
+; LOOP-NEXT:    v_or_b32_e32 v29, v29, v36
+; LOOP-NEXT:    v_or_b32_e32 v30, v30, v34
+; LOOP-NEXT:    v_add_i32_e32 v34, vcc, v0, v4
+; LOOP-NEXT:    v_addc_u32_e32 v35, vcc, v1, v5, vcc
+; LOOP-NEXT:    v_add_i32_e32 v4, vcc, 32, v4
+; LOOP-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; LOOP-NEXT:    v_cmp_gt_u32_e32 vcc, 32, v4
+; LOOP-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
+; LOOP-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
+; LOOP-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; LOOP-NEXT:    v_lshlrev_b32_e32 v13, 8, v13
+; LOOP-NEXT:    v_lshlrev_b32_e32 v15, 24, v15
+; LOOP-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
+; LOOP-NEXT:    v_lshlrev_b32_e32 v17, 8, v17
 ; LOOP-NEXT:    s_waitcnt vmcnt(12)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v11, 24, v13
-; LOOP-NEXT:    v_lshlrev_b32_e32 v12, 16, v12
+; LOOP-NEXT:    v_lshlrev_b32_e32 v19, 24, v19
+; LOOP-NEXT:    v_lshlrev_b32_e32 v18, 16, v18
 ; LOOP-NEXT:    s_waitcnt vmcnt(10)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v13, 8, v15
+; LOOP-NEXT:    v_lshlrev_b32_e32 v21, 8, v21
 ; LOOP-NEXT:    s_waitcnt vmcnt(8)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v15, 24, v17
-; LOOP-NEXT:    v_lshlrev_b32_e32 v16, 16, v16
+; LOOP-NEXT:    v_lshlrev_b32_e32 v23, 24, v23
+; LOOP-NEXT:    v_lshlrev_b32_e32 v22, 16, v22
 ; LOOP-NEXT:    s_waitcnt vmcnt(6)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v17, 8, v19
+; LOOP-NEXT:    v_lshlrev_b32_e32 v25, 8, v25
 ; LOOP-NEXT:    s_waitcnt vmcnt(4)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v19, 24, v21
-; LOOP-NEXT:    v_lshlrev_b32_e32 v20, 16, v20
+; LOOP-NEXT:    v_lshlrev_b32_e32 v28, 24, v28
+; LOOP-NEXT:    v_lshlrev_b32_e32 v26, 16, v26
 ; LOOP-NEXT:    s_waitcnt vmcnt(2)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v21, 8, v23
+; LOOP-NEXT:    v_lshlrev_b32_e32 v31, 8, v31
 ; LOOP-NEXT:    s_waitcnt vmcnt(0)
-; LOOP-NEXT:    v_lshlrev_b32_e32 v6, 24, v6
-; LOOP-NEXT:    v_lshlrev_b32_e32 v23, 16, v24
-; LOOP-NEXT:    v_or_b32_e32 v7, v7, v10
-; LOOP-NEXT:    v_or_b32_e32 v10, v11, v12
-; LOOP-NEXT:    v_or_b32_e32 v11, v13, v14
-; LOOP-NEXT:    v_or_b32_e32 v12, v15, v16
-; LOOP-NEXT:    v_or_b32_e32 v13, v17, v18
-; LOOP-NEXT:    v_or_b32_e32 v14, v19, v20
-; LOOP-NEXT:    v_or_b32_e32 v15, v21, v22
-; LOOP-NEXT:    v_or_b32_e32 v6, v6, v23
+; LOOP-NEXT:    v_lshlrev_b32_e32 v33, 24, v33
+; LOOP-NEXT:    v_lshlrev_b32_e32 v32, 16, v32
+; LOOP-NEXT:    v_or_b32_e32 v6, v9, v6
+; LOOP-NEXT:    v_or_b32_e32 v9, v11, v10
+; LOOP-NEXT:    v_or_b32_e32 v7, v13, v7
+; LOOP-NEXT:    v_or_b32_e32 v10, v15, v14
+; LOOP-NEXT:    v_or_b32_e32 v8, v17, v8
+; LOOP-NEXT:    v_or_b32_e32 v11, v19, v18
+; LOOP-NEXT:    v_or_b32_e32 v12, v21, v12
+; LOOP-NEXT:    v_or_b32_e32 v13, v23, v22
+; LOOP-NEXT:    v_or_b32_e32 v14, v25, v16
+; LOOP-NEXT:    v_or_b32_e32 v15, v28, v26
+; LOOP-NEXT:    v_or_b32_e32 v16, v31, v20
+; LOOP-NEXT:    v_or_b32_e32 v17, v33, v32
+; LOOP-NEXT:    v_or_b32_e32 v18, v27, v24
+; LOOP-NEXT:    v_or_b32_e32 v19, v30, v29
+; LOOP-NEXT:    v_or_b32_e32 v6, v9, v6
 ; LOOP-NEXT:    v_or_b32_e32 v7, v10, v7
-; LOOP-NEXT:    v_or_b32_e32 v10, v12, v11
-; LOOP-NEXT:    v_or_b32_e32 v11, v14, v13
-; LOOP-NEXT:    v_or_b32_e32 v6, v6, v15
-; LOOP-NEXT:    v_lshrrev_b32_e32 v12, 16, v7
-; LOOP-NEXT:    v_bfe_u32 v13, v7, 8, 8
-; LOOP-NEXT:    buffer_store_byte v7, v[8:9], s[0:3], 0 addr64
+; LOOP-NEXT:    v_or_b32_e32 v8, v11, v8
+; LOOP-NEXT:    v_or_b32_e32 v9, v13, v12
+; LOOP-NEXT:    v_or_b32_e32 v10, v15, v14
+; LOOP-NEXT:    v_or_b32_e32 v11, v17, v16
+; LOOP-NEXT:    v_lshrrev_b32_e32 v12, 16, v18
+; LOOP-NEXT:    v_bfe_u32 v13, v18, 8, 8
+; LOOP-NEXT:    buffer_store_byte v18, v[34:35], s[0:3], 0 addr64
+; LOOP-NEXT:    v_lshrrev_b32_e32 v14, 24, v18
+; LOOP-NEXT:    v_lshrrev_b32_e32 v15, 16, v19
+; LOOP-NEXT:    v_bfe_u32 v16, v19, 8, 8
+; LOOP-NEXT:    buffer_store_byte v19, v[34:35], s[0:3], 0 addr64 offset:4
+; LOOP-NEXT:    v_lshrrev_b32_e32 v17, 24, v19
+; LOOP-NEXT:    s_waitcnt expcnt(1)
+; LOOP-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
+; LOOP-NEXT:    s_waitcnt expcnt(0)
+; LOOP-NEXT:    v_bfe_u32 v19, v6, 8, 8
+; LOOP-NEXT:    buffer_store_byte v6, v[34:35], s[0:3], 0 addr64 offset:8
+; LOOP-NEXT:    s_waitcnt expcnt(0)
+; LOOP-NEXT:    v_lshrrev_b32_e32 v6, 24, v6
+; LOOP-NEXT:    v_lshrrev_b32_e32 v20, 16, v7
+; LOOP-NEXT:    v_bfe_u32 v21, v7, 8, 8
+; LOOP-NEXT:    buffer_store_byte v7, v[34:35], s[0:3], 0 addr64 offset:12
 ; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    v_lshrrev_b32_e32 v7, 24, v7
-; LOOP-NEXT:    v_lshrrev_b32_e32 v14, 16, v10
-; LOOP-NEXT:    v_bfe_u32 v15, v10, 8, 8
-; LOOP-NEXT:    buffer_store_byte v10, v[8:9], s[0:3], 0 addr64 offset:4
+; LOOP-NEXT:    v_lshrrev_b32_e32 v22, 16, v8
+; LOOP-NEXT:    v_bfe_u32 v23, v8, 8, 8
+; LOOP-NEXT:    buffer_store_byte v8, v[34:35], s[0:3], 0 addr64 offset:16
+; LOOP-NEXT:    s_waitcnt expcnt(0)
+; LOOP-NEXT:    v_lshrrev_b32_e32 v8, 24, v8
+; LOOP-NEXT:    v_lshrrev_b32_e32 v24, 16, v9
+; LOOP-NEXT:    v_bfe_u32 v25, v9, 8, 8
+; LOOP-NEXT:    buffer_store_byte v9, v[34:35], s[0:3], 0 addr64 offset:20
+; LOOP-NEXT:    s_waitcnt expcnt(0)
+; LOOP-NEXT:    v_lshrrev_b32_e32 v9, 24, v9
+; LOOP-NEXT:    v_lshrrev_b32_e32 v26, 16, v10
+; LOOP-NEXT:    v_bfe_u32 v27, v10, 8, 8
+; LOOP-NEXT:    buffer_store_byte v10, v[34:35], s[0:3], 0 addr64 offset:24
 ; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    v_lshrrev_b32_e32 v10, 24, v10
-; LOOP-NEXT:    v_lshrrev_b32_e32 v16, 16, v11
-; LOOP-NEXT:    v_bfe_u32 v17, v11, 8, 8
-; LOOP-NEXT:    buffer_store_byte v11, v[8:9], s[0:3], 0 addr64 offset:8
+; LOOP-NEXT:    v_lshrrev_b32_e32 v28, 16, v11
+; LOOP-NEXT:    v_bfe_u32 v29, v11, 8, 8
+; LOOP-NEXT:    buffer_store_byte v11, v[34:35], s[0:3], 0 addr64 offset:28
 ; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    v_lshrrev_b32_e32 v11, 24, v11
-; LOOP-NEXT:    v_lshrrev_b32_e32 v18, 16, v6
-; LOOP-NEXT:    v_bfe_u32 v19, v6, 8, 8
-; LOOP-NEXT:    buffer_store_byte v6, v[8:9], s[0:3], 0 addr64 offset:12
-; LOOP-NEXT:    s_waitcnt expcnt(0)
-; LOOP-NEXT:    v_lshrrev_b32_e32 v6, 24, v6
-; LOOP-NEXT:    buffer_store_byte v13, v[8:9], s[0:3], 0 addr64 offset:1
-; LOOP-NEXT:    buffer_store_byte v12, v[8:9], s[0:3], 0 addr64 offset:2
-; LOOP-NEXT:    buffer_store_byte v7, v[8:9], s[0:3], 0 addr64 offset:3
-; LOOP-NEXT:    buffer_store_byte v15, v[8:9], s[0:3], 0 addr64 offset:5
-; LOOP-NEXT:    buffer_store_byte v14, v[8:9], s[0:3], 0 addr64 offset:6
-; LOOP-NEXT:    buffer_store_byte v10, v[8:9], s[0:3], 0 addr64 offset:7
-; LOOP-NEXT:    buffer_store_byte v17, v[8:9], s[0:3], 0 addr64 offset:9
-; LOOP-NEXT:    buffer_store_byte v16, v[8:9], s[0:3], 0 addr64 offset:10
-; LOOP-NEXT:    buffer_store_byte v11, v[8:9], s[0:3], 0 addr64 offset:11
-; LOOP-NEXT:    buffer_store_byte v19, v[8:9], s[0:3], 0 addr64 offset:13
-; LOOP-NEXT:    buffer_store_byte v18, v[8:9], s[0:3], 0 addr64 offset:14
-; LOOP-NEXT:    buffer_store_byte v6, v[8:9], s[0:3], 0 addr64 offset:15
+; LOOP-NEXT:    buffer_store_byte v13, v[34:35], s[0:3], 0 addr64 offset:1
+; LOOP-NEXT:    buffer_store_byte v12, v[34:35], s[0:3], 0 addr64 offset:2
+; LOOP-NEXT:    buffer_store_byte v14, v[34:35], s[0:3], 0 addr64 offset:3
+; LOOP-NEXT:    buffer_store_byte v16, v[34:35], s[0:3], 0 addr64 offset:5
+; LOOP-NEXT:    buffer_store_byte v15, v[34:35], s[0:3], 0 addr64 offset:6
+; LOOP-NEXT:    buffer_store_byte v17, v[34:35], s[0:3], 0 addr64 offset:7
+; LOOP-NEXT:    buffer_store_byte v19, v[34:35], s[0:3], 0 addr64 offset:9
+; LOOP-NEXT:    buffer_store_byte v18, v[34:35], s[0:3], 0 addr64 offset:10
+; LOOP-NEXT:    buffer_store_byte v6, v[34:35], s[0:3], 0 addr64 offset:11
+; LOOP-NEXT:    buffer_store_byte v21, v[34:35], s[0:3], 0 addr64 offset:13
+; LOOP-NEXT:    buffer_store_byte v20, v[34:35], s[0:3], 0 addr64 offset:14
+; LOOP-NEXT:    buffer_store_byte v7, v[34:35], s[0:3], 0 addr64 offset:15
+; LOOP-NEXT:    buffer_store_byte v23, v[34:35], s[0:3], 0 addr64 offset:17
+; LOOP-NEXT:    buffer_store_byte v22, v[34:35], s[0:3], 0 addr64 offset:18
+; LOOP-NEXT:    buffer_store_byte v8, v[34:35], s[0:3], 0 addr64 offset:19
+; LOOP-NEXT:    buffer_store_byte v25, v[34:35], s[0:3], 0 addr64 offset:21
+; LOOP-NEXT:    buffer_store_byte v24, v[34:35], s[0:3], 0 addr64 offset:22
+; LOOP-NEXT:    buffer_store_byte v9, v[34:35], s[0:3], 0 addr64 offset:23
+; LOOP-NEXT:    buffer_store_byte v27, v[34:35], s[0:3], 0 addr64 offset:25
+; LOOP-NEXT:    buffer_store_byte v26, v[34:35], s[0:3], 0 addr64 offset:26
+; LOOP-NEXT:    buffer_store_byte v10, v[34:35], s[0:3], 0 addr64 offset:27
+; LOOP-NEXT:    buffer_store_byte v29, v[34:35], s[0:3], 0 addr64 offset:29
+; LOOP-NEXT:    buffer_store_byte v28, v[34:35], s[0:3], 0 addr64 offset:30
+; LOOP-NEXT:    buffer_store_byte v11, v[34:35], s[0:3], 0 addr64 offset:31
 ; LOOP-NEXT:    s_cbranch_vccnz .LBB0_1
 ; LOOP-NEXT:  ; %bb.2: ; %memcpy-split
 ; LOOP-NEXT:    s_mov_b32 s2, 0
 ; LOOP-NEXT:    s_mov_b32 s3, 0xf000
 ; LOOP-NEXT:    s_mov_b64 s[0:1], 0
-; LOOP-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:17
-; LOOP-NEXT:    buffer_load_ubyte v5, v[2:3], s[0:3], 0 addr64 offset:19
-; LOOP-NEXT:    s_waitcnt expcnt(0)
-; LOOP-NEXT:    buffer_load_ubyte v6, v[2:3], s[0:3], 0 addr64 offset:18
-; LOOP-NEXT:    buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:16
+; LOOP-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:33
+; LOOP-NEXT:    buffer_load_ubyte v5, v[2:3], s[0:3], 0 addr64 offset:35
+; LOOP-NEXT:    buffer_load_ubyte v6, v[2:3], s[0:3], 0 addr64 offset:34
+; LOOP-NEXT:    buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:32
 ; LOOP-NEXT:    s_waitcnt vmcnt(3)
 ; LOOP-NEXT:    v_lshlrev_b32_e32 v3, 8, v4
 ; LOOP-NEXT:    s_waitcnt vmcnt(2)
@@ -124,12 +196,12 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; LOOP-NEXT:    v_or_b32_e32 v2, v3, v2
 ; LOOP-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; LOOP-NEXT:    v_bfe_u32 v4, v2, 8, 8
-; LOOP-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:16
+; LOOP-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:32
 ; LOOP-NEXT:    s_waitcnt expcnt(0)
 ; LOOP-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
-; LOOP-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:17
-; LOOP-NEXT:    buffer_store_byte v3, v[0:1], s[0:3], 0 addr64 offset:18
-; LOOP-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:19
+; LOOP-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:33
+; LOOP-NEXT:    buffer_store_byte v3, v[0:1], s[0:3], 0 addr64 offset:34
+; LOOP-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:35
 ; LOOP-NEXT:    s_endpgm
 ;
 ; UNROLL-LABEL: memcpy_p1i8:
@@ -212,11 +284,75 @@ define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src)
 ; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:18
 ; UNROLL-NEXT:    s_waitcnt vmcnt(0)
 ; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:18
-; UNROLL-NEXT:    buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:19
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:19
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:19
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:20
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:20
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:21
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:21
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:22
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:22
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:23
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:23
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:24
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:24
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:25
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:25
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:26
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:26
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:27
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:27
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:28
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:28
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:29
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:29
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:30
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:30
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:31
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:31
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:32
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:32
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:33
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:33
+; UNROLL-NEXT:    s_waitcnt expcnt(0)
+; UNROLL-NEXT:    buffer_load_ubyte v4, v[2:3], s[0:3], 0 addr64 offset:34
+; UNROLL-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL-NEXT:    buffer_store_byte v4, v[0:1], s[0:3], 0 addr64 offset:34
+; UNROLL-NEXT:    buffer_load_ubyte v2, v[2:3], s[0:3], 0 addr64 offset:35
 ; UNROLL-NEXT:    s_waitcnt vmcnt(0)
-; UNROLL-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:19
+; UNROLL-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:35
 ; UNROLL-NEXT:    s_endpgm
-  call void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 20, i1 false)
+  call void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 36, i1 false)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 7336543b41cbc8c..236956c1829e77b 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -409,6 +409,1183 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspa
   ret void
 }
 
+define <2 x ptr addrspace(5)> @addrspacecast_v2p0_to_v2p5(<2 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v2p0_to_v2p5:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(5)>
+  ret <2 x ptr addrspace(5)> %cast
+}
+
+define <3 x ptr addrspace(5)> @addrspacecast_v3p0_to_v3p5(<3 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v3p0_to_v3p5:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(5)>
+  ret <3 x ptr addrspace(5)> %cast
+}
+
+define <4 x ptr addrspace(5)> @addrspacecast_v4p0_to_v4p5(<4 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v4p0_to_v4p5:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(5)>
+  ret <4 x ptr addrspace(5)> %cast
+}
+
+define <8 x ptr addrspace(5)> @addrspacecast_v8p0_to_v8p5(<8 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v8p0_to_v8p5:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
+; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
+; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(5)>
+  ret <8 x ptr addrspace(5)> %cast
+}
+
+define <16 x ptr addrspace(5)> @addrspacecast_v16p0_to_v16p5(<16 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v16p0_to_v16p5:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[24:25]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[26:27]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cmp_ne_u64_e64 s[8:9], 0, v[28:29]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
+; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
+; HSA-NEXT:    v_cndmask_b32_e64 v13, -1, v26, s[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; HSA-NEXT:    v_cndmask_b32_e64 v12, -1, v24, s[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
+; HSA-NEXT:    v_cndmask_b32_e64 v14, -1, v28, s[8:9]
+; HSA-NEXT:    v_cndmask_b32_e32 v8, -1, v16, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
+; HSA-NEXT:    v_cndmask_b32_e32 v9, -1, v18, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
+; HSA-NEXT:    v_cndmask_b32_e32 v10, -1, v20, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[22:23]
+; HSA-NEXT:    v_cndmask_b32_e32 v11, -1, v22, vcc
+; HSA-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[30:31]
+; HSA-NEXT:    v_cndmask_b32_e32 v15, -1, v30, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(5)>
+  ret <16 x ptr addrspace(5)> %cast
+}
+
+define <2 x ptr> @addrspacecast_v2p5_to_v2p0(<2 x ptr addrspace(5)> %ptr) {
+; CI-LABEL: addrspacecast_v2p5_to_v2p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v3, s4
+; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v4
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v2p5_to_v2p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v3, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr addrspace(5)> %ptr to <2 x ptr>
+  ret <2 x ptr> %cast
+}
+
+define <3 x ptr> @addrspacecast_v3p5_to_v3p0(<3 x ptr addrspace(5)> %ptr) {
+; CI-LABEL: addrspacecast_v3p5_to_v3p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v5, s4
+; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v7
+; CI-NEXT:    v_mov_b32_e32 v2, v6
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v3p5_to_v3p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <3 x ptr addrspace(5)> %ptr to <3 x ptr>
+  ret <3 x ptr> %cast
+}
+
+define <4 x ptr> @addrspacecast_v4p5_to_v4p0(<4 x ptr addrspace(5)> %ptr) {
+; CI-LABEL: addrspacecast_v4p5_to_v4p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v7, s4
+; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v10
+; CI-NEXT:    v_mov_b32_e32 v2, v8
+; CI-NEXT:    v_mov_b32_e32 v3, v9
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v4p5_to_v4p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v7, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v10
+; GFX9-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-NEXT:    v_mov_b32_e32 v3, v9
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr addrspace(5)> %ptr to <4 x ptr>
+  ret <4 x ptr> %cast
+}
+
+define <8 x ptr> @addrspacecast_v8p5_to_v8p0(<8 x ptr addrspace(5)> %ptr) {
+; CI-LABEL: addrspacecast_v8p5_to_v8p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v15, s4
+; CI-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; CI-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
+; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
+; CI-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
+; CI-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v22
+; CI-NEXT:    v_mov_b32_e32 v2, v16
+; CI-NEXT:    v_mov_b32_e32 v3, v17
+; CI-NEXT:    v_mov_b32_e32 v4, v18
+; CI-NEXT:    v_mov_b32_e32 v5, v19
+; CI-NEXT:    v_mov_b32_e32 v6, v20
+; CI-NEXT:    v_mov_b32_e32 v7, v21
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v8p5_to_v8p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v15, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v22
+; GFX9-NEXT:    v_mov_b32_e32 v2, v16
+; GFX9-NEXT:    v_mov_b32_e32 v3, v17
+; GFX9-NEXT:    v_mov_b32_e32 v4, v18
+; GFX9-NEXT:    v_mov_b32_e32 v5, v19
+; GFX9-NEXT:    v_mov_b32_e32 v6, v20
+; GFX9-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr addrspace(5)> %ptr to <8 x ptr>
+  ret <8 x ptr> %cast
+}
+
+define <16 x ptr> @addrspacecast_v16p5_to_v16p0(<16 x ptr addrspace(5)> %ptr) {
+; CI-LABEL: addrspacecast_v16p5_to_v16p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x11
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v6
+; CI-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v7
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v31, s4
+; CI-NEXT:    v_cndmask_b32_e32 v48, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v35, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v33, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v36, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; CI-NEXT:    v_cndmask_b32_e32 v37, 0, v3, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v34, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; CI-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v5
+; CI-NEXT:    v_cndmask_b32_e32 v38, 0, v4, vcc
+; CI-NEXT:    v_cndmask_b32_e64 v50, 0, v5, s[4:5]
+; CI-NEXT:    v_cndmask_b32_e64 v39, 0, v6, s[6:7]
+; CI-NEXT:    v_cndmask_b32_e64 v32, 0, v7, s[8:9]
+; CI-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v8
+; CI-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v9
+; CI-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v10
+; CI-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v11
+; CI-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v12
+; CI-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v13
+; CI-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v14
+; CI-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v15
+; CI-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[10:11]
+; CI-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[12:13]
+; CI-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[14:15]
+; CI-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[16:17]
+; CI-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[18:19]
+; CI-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[20:21]
+; CI-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[22:23]
+; CI-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[24:25]
+; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v31, vcc
+; CI-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[4:5]
+; CI-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[6:7]
+; CI-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[8:9]
+; CI-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[10:11]
+; CI-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[12:13]
+; CI-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[14:15]
+; CI-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[16:17]
+; CI-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[18:19]
+; CI-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[20:21]
+; CI-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[22:23]
+; CI-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[24:25]
+; CI-NEXT:    v_mov_b32_e32 v1, v48
+; CI-NEXT:    v_mov_b32_e32 v2, v35
+; CI-NEXT:    v_mov_b32_e32 v3, v33
+; CI-NEXT:    v_mov_b32_e32 v4, v36
+; CI-NEXT:    v_mov_b32_e32 v5, v49
+; CI-NEXT:    v_mov_b32_e32 v6, v37
+; CI-NEXT:    v_mov_b32_e32 v7, v34
+; CI-NEXT:    v_mov_b32_e32 v8, v38
+; CI-NEXT:    v_mov_b32_e32 v10, v50
+; CI-NEXT:    v_mov_b32_e32 v12, v39
+; CI-NEXT:    v_mov_b32_e32 v14, v32
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v16p5_to_v16p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_private_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v31, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v48, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v35, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v33, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v36, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v37, 0, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v34, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v5
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v6
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v38, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v50, 0, v5, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e64 v39, 0, v6, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v32, 0, v7, s[8:9]
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v8
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v9
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v10
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v11
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v12
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v13
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v14
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v15
+; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[10:11]
+; GFX9-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[12:13]
+; GFX9-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[14:15]
+; GFX9-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[16:17]
+; GFX9-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[18:19]
+; GFX9-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[20:21]
+; GFX9-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[22:23]
+; GFX9-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[24:25]
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v31, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[8:9]
+; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[10:11]
+; GFX9-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[12:13]
+; GFX9-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[14:15]
+; GFX9-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[16:17]
+; GFX9-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[18:19]
+; GFX9-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[20:21]
+; GFX9-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[22:23]
+; GFX9-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[24:25]
+; GFX9-NEXT:    v_mov_b32_e32 v1, v48
+; GFX9-NEXT:    v_mov_b32_e32 v2, v35
+; GFX9-NEXT:    v_mov_b32_e32 v3, v33
+; GFX9-NEXT:    v_mov_b32_e32 v4, v36
+; GFX9-NEXT:    v_mov_b32_e32 v5, v49
+; GFX9-NEXT:    v_mov_b32_e32 v6, v37
+; GFX9-NEXT:    v_mov_b32_e32 v7, v34
+; GFX9-NEXT:    v_mov_b32_e32 v8, v38
+; GFX9-NEXT:    v_mov_b32_e32 v10, v50
+; GFX9-NEXT:    v_mov_b32_e32 v12, v39
+; GFX9-NEXT:    v_mov_b32_e32 v14, v32
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr addrspace(5)> %ptr to <16 x ptr>
+  ret <16 x ptr> %cast
+}
+
+define <2 x ptr addrspace(3)> @addrspacecast_v2p0_to_v2p3(<2 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v2p0_to_v2p3:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(3)>
+  ret <2 x ptr addrspace(3)> %cast
+}
+
+define <3 x ptr addrspace(3)> @addrspacecast_v3p0_to_v3p3(<3 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v3p0_to_v3p3:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(3)>
+  ret <3 x ptr addrspace(3)> %cast
+}
+
+define <4 x ptr addrspace(3)> @addrspacecast_v4p0_to_v4p3(<4 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v4p0_to_v4p3:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(3)>
+  ret <4 x ptr addrspace(3)> %cast
+}
+
+define <8 x ptr addrspace(3)> @addrspacecast_v8p0_to_v8p3(<8 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v8p0_to_v8p3:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
+; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
+; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(3)>
+  ret <8 x ptr addrspace(3)> %cast
+}
+
+define <16 x ptr addrspace(3)> @addrspacecast_v16p0_to_v16p3(<16 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v16p0_to_v16p3:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; HSA-NEXT:    v_cmp_ne_u64_e64 s[4:5], 0, v[24:25]
+; HSA-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[2:3]
+; HSA-NEXT:    v_cmp_ne_u64_e64 s[6:7], 0, v[26:27]
+; HSA-NEXT:    v_cndmask_b32_e32 v1, -1, v2, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[4:5]
+; HSA-NEXT:    v_cmp_ne_u64_e64 s[8:9], 0, v[28:29]
+; HSA-NEXT:    v_cndmask_b32_e32 v2, -1, v4, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v3, -1, v6, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[8:9]
+; HSA-NEXT:    v_cndmask_b32_e32 v4, -1, v8, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[10:11]
+; HSA-NEXT:    v_cndmask_b32_e32 v5, -1, v10, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[12:13]
+; HSA-NEXT:    v_cndmask_b32_e64 v13, -1, v26, s[6:7]
+; HSA-NEXT:    v_cndmask_b32_e32 v6, -1, v12, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[14:15]
+; HSA-NEXT:    v_cndmask_b32_e64 v12, -1, v24, s[4:5]
+; HSA-NEXT:    v_cndmask_b32_e32 v7, -1, v14, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[16:17]
+; HSA-NEXT:    v_cndmask_b32_e64 v14, -1, v28, s[8:9]
+; HSA-NEXT:    v_cndmask_b32_e32 v8, -1, v16, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[18:19]
+; HSA-NEXT:    v_cndmask_b32_e32 v9, -1, v18, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[20:21]
+; HSA-NEXT:    v_cndmask_b32_e32 v10, -1, v20, vcc
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[22:23]
+; HSA-NEXT:    v_cndmask_b32_e32 v11, -1, v22, vcc
+; HSA-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NEXT:    v_cmp_ne_u64_e32 vcc, 0, v[30:31]
+; HSA-NEXT:    v_cndmask_b32_e32 v15, -1, v30, vcc
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(3)>
+  ret <16 x ptr addrspace(3)> %cast
+}
+
+define <2 x ptr> @addrspacecast_v2p3_to_v2p0(<2 x ptr addrspace(3)> %ptr) {
+; CI-LABEL: addrspacecast_v2p3_to_v2p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v3, s4
+; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v4
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v2p3_to_v2p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v3, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v3, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v4
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr addrspace(3)> %ptr to <2 x ptr>
+  ret <2 x ptr> %cast
+}
+
+define <3 x ptr> @addrspacecast_v3p3_to_v3p0(<3 x ptr addrspace(3)> %ptr) {
+; CI-LABEL: addrspacecast_v3p3_to_v3p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v5, s4
+; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v7
+; CI-NEXT:    v_mov_b32_e32 v2, v6
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v3p3_to_v3p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v5, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, 0, v5, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v5, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v7
+; GFX9-NEXT:    v_mov_b32_e32 v2, v6
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <3 x ptr addrspace(3)> %ptr to <3 x ptr>
+  ret <3 x ptr> %cast
+}
+
+define <4 x ptr> @addrspacecast_v4p3_to_v4p0(<4 x ptr addrspace(3)> %ptr) {
+; CI-LABEL: addrspacecast_v4p3_to_v4p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v7, s4
+; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; CI-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v10
+; CI-NEXT:    v_mov_b32_e32 v2, v8
+; CI-NEXT:    v_mov_b32_e32 v3, v9
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v4p3_to_v4p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v7, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, 0, v7, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, 0, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, 0, v7, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v10
+; GFX9-NEXT:    v_mov_b32_e32 v2, v8
+; GFX9-NEXT:    v_mov_b32_e32 v3, v9
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr addrspace(3)> %ptr to <4 x ptr>
+  ret <4 x ptr> %cast
+}
+
+define <8 x ptr> @addrspacecast_v8p3_to_v8p0(<8 x ptr addrspace(3)> %ptr) {
+; CI-LABEL: addrspacecast_v8p3_to_v8p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v15, s4
+; CI-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; CI-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; CI-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
+; CI-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
+; CI-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
+; CI-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
+; CI-NEXT:    v_mov_b32_e32 v1, v22
+; CI-NEXT:    v_mov_b32_e32 v2, v16
+; CI-NEXT:    v_mov_b32_e32 v3, v17
+; CI-NEXT:    v_mov_b32_e32 v4, v18
+; CI-NEXT:    v_mov_b32_e32 v5, v19
+; CI-NEXT:    v_mov_b32_e32 v6, v20
+; CI-NEXT:    v_mov_b32_e32 v7, v21
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v8p3_to_v8p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v15, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v22, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v16, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v17, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v18, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v19, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v20, 0, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v21, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v10, 0, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v11, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v6
+; GFX9-NEXT:    v_cndmask_b32_e32 v12, 0, v6, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v13, 0, v15, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v14, 0, v7, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v15, 0, v15, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v1, v22
+; GFX9-NEXT:    v_mov_b32_e32 v2, v16
+; GFX9-NEXT:    v_mov_b32_e32 v3, v17
+; GFX9-NEXT:    v_mov_b32_e32 v4, v18
+; GFX9-NEXT:    v_mov_b32_e32 v5, v19
+; GFX9-NEXT:    v_mov_b32_e32 v6, v20
+; GFX9-NEXT:    v_mov_b32_e32 v7, v21
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr addrspace(3)> %ptr to <8 x ptr>
+  ret <8 x ptr> %cast
+}
+
+define <16 x ptr> @addrspacecast_v16p3_to_v16p0(<16 x ptr addrspace(3)> %ptr) {
+; CI-LABEL: addrspacecast_v16p3_to_v16p0:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT:    s_load_dword s4, s[6:7], 0x10
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; CI-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; CI-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v6
+; CI-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v7
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v31, s4
+; CI-NEXT:    v_cndmask_b32_e32 v48, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; CI-NEXT:    v_cndmask_b32_e32 v35, 0, v1, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v33, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; CI-NEXT:    v_cndmask_b32_e32 v36, 0, v2, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; CI-NEXT:    v_cndmask_b32_e32 v37, 0, v3, vcc
+; CI-NEXT:    v_cndmask_b32_e32 v34, 0, v31, vcc
+; CI-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; CI-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v5
+; CI-NEXT:    v_cndmask_b32_e32 v38, 0, v4, vcc
+; CI-NEXT:    v_cndmask_b32_e64 v50, 0, v5, s[4:5]
+; CI-NEXT:    v_cndmask_b32_e64 v39, 0, v6, s[6:7]
+; CI-NEXT:    v_cndmask_b32_e64 v32, 0, v7, s[8:9]
+; CI-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v8
+; CI-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v9
+; CI-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v10
+; CI-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v11
+; CI-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v12
+; CI-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v13
+; CI-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v14
+; CI-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v15
+; CI-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[10:11]
+; CI-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[12:13]
+; CI-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[14:15]
+; CI-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[16:17]
+; CI-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[18:19]
+; CI-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[20:21]
+; CI-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[22:23]
+; CI-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[24:25]
+; CI-NEXT:    v_cndmask_b32_e32 v9, 0, v31, vcc
+; CI-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[4:5]
+; CI-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[6:7]
+; CI-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[8:9]
+; CI-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[10:11]
+; CI-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[12:13]
+; CI-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[14:15]
+; CI-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[16:17]
+; CI-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[18:19]
+; CI-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[20:21]
+; CI-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[22:23]
+; CI-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[24:25]
+; CI-NEXT:    v_mov_b32_e32 v1, v48
+; CI-NEXT:    v_mov_b32_e32 v2, v35
+; CI-NEXT:    v_mov_b32_e32 v3, v33
+; CI-NEXT:    v_mov_b32_e32 v4, v36
+; CI-NEXT:    v_mov_b32_e32 v5, v49
+; CI-NEXT:    v_mov_b32_e32 v6, v37
+; CI-NEXT:    v_mov_b32_e32 v7, v34
+; CI-NEXT:    v_mov_b32_e32 v8, v38
+; CI-NEXT:    v_mov_b32_e32 v10, v50
+; CI-NEXT:    v_mov_b32_e32 v12, v39
+; CI-NEXT:    v_mov_b32_e32 v14, v32
+; CI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: addrspacecast_v16p3_to_v16p0:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b64 s[4:5], src_shared_base
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v0
+; GFX9-NEXT:    v_mov_b32_e32 v31, s5
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v48, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v35, 0, v1, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v33, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v36, 0, v2, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v49, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v37, 0, v3, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v34, 0, v31, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, -1, v4
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[4:5], -1, v5
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[6:7], -1, v6
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[8:9], -1, v7
+; GFX9-NEXT:    v_cndmask_b32_e32 v38, 0, v4, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v50, 0, v5, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e64 v39, 0, v6, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v32, 0, v7, s[8:9]
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[10:11], -1, v8
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[12:13], -1, v9
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[14:15], -1, v10
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[16:17], -1, v11
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[18:19], -1, v12
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[20:21], -1, v13
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[22:23], -1, v14
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[24:25], -1, v15
+; GFX9-NEXT:    v_cndmask_b32_e64 v16, 0, v8, s[10:11]
+; GFX9-NEXT:    v_cndmask_b32_e64 v18, 0, v9, s[12:13]
+; GFX9-NEXT:    v_cndmask_b32_e64 v20, 0, v10, s[14:15]
+; GFX9-NEXT:    v_cndmask_b32_e64 v22, 0, v11, s[16:17]
+; GFX9-NEXT:    v_cndmask_b32_e64 v24, 0, v12, s[18:19]
+; GFX9-NEXT:    v_cndmask_b32_e64 v26, 0, v13, s[20:21]
+; GFX9-NEXT:    v_cndmask_b32_e64 v28, 0, v14, s[22:23]
+; GFX9-NEXT:    v_cndmask_b32_e64 v30, 0, v15, s[24:25]
+; GFX9-NEXT:    v_cndmask_b32_e32 v9, 0, v31, vcc
+; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, v31, s[4:5]
+; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, v31, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v15, 0, v31, s[8:9]
+; GFX9-NEXT:    v_cndmask_b32_e64 v17, 0, v31, s[10:11]
+; GFX9-NEXT:    v_cndmask_b32_e64 v19, 0, v31, s[12:13]
+; GFX9-NEXT:    v_cndmask_b32_e64 v21, 0, v31, s[14:15]
+; GFX9-NEXT:    v_cndmask_b32_e64 v23, 0, v31, s[16:17]
+; GFX9-NEXT:    v_cndmask_b32_e64 v25, 0, v31, s[18:19]
+; GFX9-NEXT:    v_cndmask_b32_e64 v27, 0, v31, s[20:21]
+; GFX9-NEXT:    v_cndmask_b32_e64 v29, 0, v31, s[22:23]
+; GFX9-NEXT:    v_cndmask_b32_e64 v31, 0, v31, s[24:25]
+; GFX9-NEXT:    v_mov_b32_e32 v1, v48
+; GFX9-NEXT:    v_mov_b32_e32 v2, v35
+; GFX9-NEXT:    v_mov_b32_e32 v3, v33
+; GFX9-NEXT:    v_mov_b32_e32 v4, v36
+; GFX9-NEXT:    v_mov_b32_e32 v5, v49
+; GFX9-NEXT:    v_mov_b32_e32 v6, v37
+; GFX9-NEXT:    v_mov_b32_e32 v7, v34
+; GFX9-NEXT:    v_mov_b32_e32 v8, v38
+; GFX9-NEXT:    v_mov_b32_e32 v10, v50
+; GFX9-NEXT:    v_mov_b32_e32 v12, v39
+; GFX9-NEXT:    v_mov_b32_e32 v14, v32
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr addrspace(3)> %ptr to <16 x ptr>
+  ret <16 x ptr> %cast
+}
+
+define <2 x ptr addrspace(1)> @addrspacecast_v2p0_to_v2p1(<2 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v2p0_to_v2p1:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(1)>
+  ret <2 x ptr addrspace(1)> %cast
+}
+
+define <3 x ptr addrspace(1)> @addrspacecast_v3p0_to_v3p1(<3 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v3p0_to_v3p1:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(1)>
+  ret <3 x ptr addrspace(1)> %cast
+}
+
+define <4 x ptr addrspace(1)> @addrspacecast_v4p0_to_v4p1(<4 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v4p0_to_v4p1:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(1)>
+  ret <4 x ptr addrspace(1)> %cast
+}
+
+define <8 x ptr addrspace(1)> @addrspacecast_v8p0_to_v8p1(<8 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v8p0_to_v8p1:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(1)>
+  ret <8 x ptr addrspace(1)> %cast
+}
+
+define <16 x ptr addrspace(1)> @addrspacecast_v16p0_to_v16p1(<16 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v16p0_to_v16p1:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; HSA-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(1)>
+  ret <16 x ptr addrspace(1)> %cast
+}
+
+define <2 x ptr> @addrspacecast_v2p1_to_v2p0(<2 x ptr addrspace(1)> %ptr) {
+; HSA-LABEL: addrspacecast_v2p1_to_v2p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr addrspace(1)> %ptr to <2 x ptr>
+  ret <2 x ptr> %cast
+}
+
+define <1 x ptr> @addrspacecast_v1p1_to_v1p0(<1 x ptr addrspace(1)> %ptr) {
+; HSA-LABEL: addrspacecast_v1p1_to_v1p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <1 x ptr addrspace(1)> %ptr to <1 x ptr>
+  ret <1 x ptr> %cast
+}
+
+define <4 x ptr> @addrspacecast_v4p1_to_v4p0(<4 x ptr addrspace(1)> %ptr) {
+; HSA-LABEL: addrspacecast_v4p1_to_v4p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr addrspace(1)> %ptr to <4 x ptr>
+  ret <4 x ptr> %cast
+}
+
+define <8 x ptr> @addrspacecast_v8p1_to_v8p0(<8 x ptr addrspace(1)> %ptr) {
+; HSA-LABEL: addrspacecast_v8p1_to_v8p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr addrspace(1)> %ptr to <8 x ptr>
+  ret <8 x ptr> %cast
+}
+
+define <16 x ptr> @addrspacecast_v16p1_to_v16p0(<16 x ptr addrspace(1)> %ptr) {
+; HSA-LABEL: addrspacecast_v16p1_to_v16p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
+; HSA-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr addrspace(1)> %ptr to <16 x ptr>
+  ret <16 x ptr> %cast
+}
+
+define <2 x ptr addrspace(6)> @addrspacecast_v2p0_to_v2p6(<2 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v2p0_to_v2p6:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v1, v2
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr> %ptr to <2 x ptr addrspace(6)>
+  ret <2 x ptr addrspace(6)> %cast
+}
+
+define <3 x ptr addrspace(6)> @addrspacecast_v3p0_to_v3p6(<3 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v3p0_to_v3p6:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v1, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v4
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <3 x ptr> %ptr to <3 x ptr addrspace(6)>
+  ret <3 x ptr addrspace(6)> %cast
+}
+
+define <4 x ptr addrspace(6)> @addrspacecast_v4p0_to_v4p6(<4 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v4p0_to_v4p6:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v3, v6
+; HSA-NEXT:    v_mov_b32_e32 v1, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v4
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr> %ptr to <4 x ptr addrspace(6)>
+  ret <4 x ptr addrspace(6)> %cast
+}
+
+define <8 x ptr addrspace(6)> @addrspacecast_v8p0_to_v8p6(<8 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v8p0_to_v8p6:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v7, v14
+; HSA-NEXT:    v_mov_b32_e32 v5, v10
+; HSA-NEXT:    v_mov_b32_e32 v3, v6
+; HSA-NEXT:    v_mov_b32_e32 v1, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v4
+; HSA-NEXT:    v_mov_b32_e32 v4, v8
+; HSA-NEXT:    v_mov_b32_e32 v6, v12
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr> %ptr to <8 x ptr addrspace(6)>
+  ret <8 x ptr addrspace(6)> %cast
+}
+
+define <16 x ptr addrspace(6)> @addrspacecast_v16p0_to_v16p6(<16 x ptr> %ptr) {
+; HSA-LABEL: addrspacecast_v16p0_to_v16p6:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v15, v30
+; HSA-NEXT:    v_mov_b32_e32 v13, v26
+; HSA-NEXT:    v_mov_b32_e32 v11, v22
+; HSA-NEXT:    v_mov_b32_e32 v9, v18
+; HSA-NEXT:    v_mov_b32_e32 v7, v14
+; HSA-NEXT:    v_mov_b32_e32 v5, v10
+; HSA-NEXT:    v_mov_b32_e32 v3, v6
+; HSA-NEXT:    v_mov_b32_e32 v1, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v4
+; HSA-NEXT:    v_mov_b32_e32 v4, v8
+; HSA-NEXT:    v_mov_b32_e32 v6, v12
+; HSA-NEXT:    v_mov_b32_e32 v8, v16
+; HSA-NEXT:    v_mov_b32_e32 v10, v20
+; HSA-NEXT:    v_mov_b32_e32 v12, v24
+; HSA-NEXT:    v_mov_b32_e32 v14, v28
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr> %ptr to <16 x ptr addrspace(6)>
+  ret <16 x ptr addrspace(6)> %cast
+}
+
+define <2 x ptr> @addrspacecast_v2p6_to_v2p0(<2 x ptr addrspace(6)> %ptr) {
+; HSA-LABEL: addrspacecast_v2p6_to_v2p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v2, v1
+; HSA-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-NEXT:    v_mov_b32_e32 v3, 0
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <2 x ptr addrspace(6)> %ptr to <2 x ptr>
+  ret <2 x ptr> %cast
+}
+
+define <1 x ptr> @addrspacecast_v1p6_to_v1p0(<1 x ptr addrspace(6)> %ptr) {
+; HSA-LABEL: addrspacecast_v1p6_to_v1p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <1 x ptr addrspace(6)> %ptr to <1 x ptr>
+  ret <1 x ptr> %cast
+}
+
+define <4 x ptr> @addrspacecast_v4p6_to_v4p0(<4 x ptr addrspace(6)> %ptr) {
+; HSA-LABEL: addrspacecast_v4p6_to_v4p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v6, v3
+; HSA-NEXT:    v_mov_b32_e32 v4, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v1
+; HSA-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-NEXT:    v_mov_b32_e32 v3, 0
+; HSA-NEXT:    v_mov_b32_e32 v5, 0
+; HSA-NEXT:    v_mov_b32_e32 v7, 0
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <4 x ptr addrspace(6)> %ptr to <4 x ptr>
+  ret <4 x ptr> %cast
+}
+
+define <8 x ptr> @addrspacecast_v8p6_to_v8p0(<8 x ptr addrspace(6)> %ptr) {
+; HSA-LABEL: addrspacecast_v8p6_to_v8p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v14, v7
+; HSA-NEXT:    v_mov_b32_e32 v12, v6
+; HSA-NEXT:    v_mov_b32_e32 v10, v5
+; HSA-NEXT:    v_mov_b32_e32 v8, v4
+; HSA-NEXT:    v_mov_b32_e32 v6, v3
+; HSA-NEXT:    v_mov_b32_e32 v4, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v1
+; HSA-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-NEXT:    v_mov_b32_e32 v3, 0
+; HSA-NEXT:    v_mov_b32_e32 v5, 0
+; HSA-NEXT:    v_mov_b32_e32 v7, 0
+; HSA-NEXT:    v_mov_b32_e32 v9, 0
+; HSA-NEXT:    v_mov_b32_e32 v11, 0
+; HSA-NEXT:    v_mov_b32_e32 v13, 0
+; HSA-NEXT:    v_mov_b32_e32 v15, 0
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <8 x ptr addrspace(6)> %ptr to <8 x ptr>
+  ret <8 x ptr> %cast
+}
+
+define <16 x ptr> @addrspacecast_v16p6_to_v16p0(<16 x ptr addrspace(6)> %ptr) {
+; HSA-LABEL: addrspacecast_v16p6_to_v16p0:
+; HSA:       ; %bb.0:
+; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; HSA-NEXT:    v_mov_b32_e32 v30, v15
+; HSA-NEXT:    v_mov_b32_e32 v28, v14
+; HSA-NEXT:    v_mov_b32_e32 v26, v13
+; HSA-NEXT:    v_mov_b32_e32 v24, v12
+; HSA-NEXT:    v_mov_b32_e32 v22, v11
+; HSA-NEXT:    v_mov_b32_e32 v20, v10
+; HSA-NEXT:    v_mov_b32_e32 v18, v9
+; HSA-NEXT:    v_mov_b32_e32 v16, v8
+; HSA-NEXT:    v_mov_b32_e32 v14, v7
+; HSA-NEXT:    v_mov_b32_e32 v12, v6
+; HSA-NEXT:    v_mov_b32_e32 v10, v5
+; HSA-NEXT:    v_mov_b32_e32 v8, v4
+; HSA-NEXT:    v_mov_b32_e32 v6, v3
+; HSA-NEXT:    v_mov_b32_e32 v4, v2
+; HSA-NEXT:    v_mov_b32_e32 v2, v1
+; HSA-NEXT:    v_mov_b32_e32 v1, 0
+; HSA-NEXT:    v_mov_b32_e32 v3, 0
+; HSA-NEXT:    v_mov_b32_e32 v5, 0
+; HSA-NEXT:    v_mov_b32_e32 v7, 0
+; HSA-NEXT:    v_mov_b32_e32 v9, 0
+; HSA-NEXT:    v_mov_b32_e32 v11, 0
+; HSA-NEXT:    v_mov_b32_e32 v13, 0
+; HSA-NEXT:    v_mov_b32_e32 v15, 0
+; HSA-NEXT:    v_mov_b32_e32 v17, 0
+; HSA-NEXT:    v_mov_b32_e32 v19, 0
+; HSA-NEXT:    v_mov_b32_e32 v21, 0
+; HSA-NEXT:    v_mov_b32_e32 v23, 0
+; HSA-NEXT:    v_mov_b32_e32 v25, 0
+; HSA-NEXT:    v_mov_b32_e32 v27, 0
+; HSA-NEXT:    v_mov_b32_e32 v29, 0
+; HSA-NEXT:    v_mov_b32_e32 v31, 0
+; HSA-NEXT:    s_setpc_b64 s[30:31]
+  %cast = addrspacecast <16 x ptr addrspace(6)> %ptr to <16 x ptr>
+  ret <16 x ptr> %cast
+}
+
 declare void @llvm.amdgcn.s.barrier() #1
 declare i32 @llvm.amdgcn.workitem.id.x() #2
 
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 862543299239717..055e9850de3d68a 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -38,24 +38,19 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr30_sgpr31 = S_MOV_B64 0
   ; GFX90A-NEXT:   renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
-  ; GFX90A-NEXT:   $vgpr22 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr10 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr24 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr18 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   $vgpr20 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_CBRANCH_VCCNZ %bb.59, implicit $vcc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   successors: %bb.3(0x80000000)
-  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
+  ; GFX90A-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
+  ; GFX90A-NEXT:   renamable $vgpr19 = IMPLICIT_DEF implicit-def $vgpr18
+  ; GFX90A-NEXT:   renamable $vgpr21 = IMPLICIT_DEF implicit-def $vgpr20
+  ; GFX90A-NEXT:   renamable $vgpr23 = IMPLICIT_DEF implicit-def $vgpr22
+  ; GFX90A-NEXT:   renamable $vgpr25 = IMPLICIT_DEF implicit-def $vgpr24
   ; GFX90A-NEXT:   renamable $sgpr28_sgpr29 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.3.Flow17:
@@ -111,8 +106,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.6.Flow20:
@@ -395,8 +390,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.37, implicit $exec
@@ -434,8 +429,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.39, implicit $exec
@@ -484,8 +479,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr38_sgpr39 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.41, implicit $exec
@@ -535,8 +530,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr40_sgpr41 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.47, implicit $exec
@@ -589,8 +584,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $sgpr44_sgpr45 = S_MOV_B64 0
   ; GFX90A-NEXT: {{  $}}
@@ -643,8 +638,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr16_sgpr17 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.43, implicit $exec
@@ -689,8 +684,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.45
   ; GFX90A-NEXT: {{  $}}
@@ -719,8 +714,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.46
   ; GFX90A-NEXT: {{  $}}
@@ -748,8 +743,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   S_BRANCH %bb.62
   ; GFX90A-NEXT: {{  $}}
@@ -773,8 +768,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   renamable $sgpr15 = IMPLICIT_DEF
   ; GFX90A-NEXT:   $sgpr58_sgpr59 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.53, implicit $exec
@@ -880,8 +875,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
   ; GFX90A-NEXT:   renamable $vgpr52 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr16 = IMPLICIT_DEF
   ; GFX90A-NEXT:   renamable $vgpr53 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF
-  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF
+  ; GFX90A-NEXT:   renamable $vgpr13 = IMPLICIT_DEF implicit-def $vgpr12
+  ; GFX90A-NEXT:   renamable $vgpr11 = IMPLICIT_DEF implicit-def $vgpr10
   ; GFX90A-NEXT:   $sgpr50_sgpr51 = S_AND_SAVEEXEC_B64 $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
   ; GFX90A-NEXT:   S_CBRANCH_EXECNZ %bb.57, implicit $exec
   ; GFX90A-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir
index 2cb84c7ef4637d5..072cc3a60a60cae 100644
--- a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-tailduplication -o - %s | FileCheck %s
 
  # There are no phis in this testcase. Early tail duplication introduces them,
  # so the NoPHIs property needs to be cleared to avoid verifier errors
diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir
index 41c6906b3c85ad2..8132fa4df89eee4 100644
--- a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-tailduplication -o - %s | FileCheck %s
 
 # Early tail duplication should not merge bb.6 into bb.5, adding a
 # non-terminator (S_SLEEP) after the terminator S_MOV_B32_term.
diff --git a/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
new file mode 100644
index 000000000000000..8065e2cfc004322
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-omod-crash.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands %s -verify-machineinstrs -o - | FileCheck %s -check-prefix=GFX9
+
+# When V_ADD_F32 is replaced with an output modifier on V_RSQ_F32, check that
+# the kill flag is cleared on the use of %4 in V_MUL_F32.
+---
+name: main
+tracksRegLiveness: true
+machineFunctionInfo:
+  mode:
+    ieee: false
+    fp32-input-denormals: false
+    fp32-output-denormals: false
+body: |
+  ; GFX9-LABEL: name: main
+  ; GFX9: bb.0:
+  ; GFX9-NEXT:   successors: %bb.1(0x80000000)
+  ; GFX9-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-NEXT:   [[DEF1:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+  ; GFX9-NEXT:   [[V_RSQ_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef [[DEF]], 0, 1, implicit $mode, implicit $exec
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT: bb.1:
+  ; GFX9-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GFX9-NEXT:   [[V_MUL_F32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_F32_e64 0, killed undef [[DEF2]], 0, [[V_RSQ_F32_e64_]], 0, 0, implicit $mode, implicit $exec
+  ; GFX9-NEXT:   SI_LOOP undef [[DEF1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+  ; GFX9-NEXT:   S_BRANCH %bb.2
+  ; GFX9-NEXT: {{  $}}
+  ; GFX9-NEXT: bb.2:
+  ; GFX9-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9
+
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64 = IMPLICIT_DEF
+    %2:vgpr_32 = nofpexcept V_RSQ_F32_e64 0, undef %0, 0, 0, implicit $mode, implicit $exec
+
+  bb.1:
+    %3:vgpr_32 = IMPLICIT_DEF
+    %4:vgpr_32 = nsz reassoc nofpexcept V_ADD_F32_e64 0, undef %2, 0, undef %2, 0, 0, implicit $mode, implicit $exec
+    %5:vgpr_32 = V_MUL_F32_e64 0, killed undef %3, 0, killed %4, 0, 0, implicit $mode, implicit $exec
+    SI_LOOP undef %1, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 6603f2ef7adef71..7421a2e10c3b572 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -32,7 +32,7 @@ body:             |
   ; CHECK-NEXT:   dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5)
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.6, implicit $exec
@@ -82,7 +82,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.6
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
index fa95f4c13417429..8ae6c279558961c 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir
@@ -30,7 +30,7 @@ body:             |
   ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK-NEXT:   dead renamable $sgpr5 = IMPLICIT_DEF
-  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF
+  ; CHECK-NEXT:   renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25
   ; CHECK-NEXT:   renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4)
   ; CHECK-NEXT:   $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5
   ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.5, implicit $exec
@@ -78,7 +78,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 32, 0 :: (invariant load (s256), addrspace 4)
   ; CHECK-NEXT:   dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
-  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24
+  ; CHECK-NEXT:   renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24
   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc
   ; CHECK-NEXT:   S_BRANCH %bb.5
   ; CHECK-NEXT: {{  $}}
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir
new file mode 100644
index 000000000000000..786ce402038369d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir
@@ -0,0 +1,55 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0, $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr4_vgpr5
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr3_vgpr4_vgpr5 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr3, renamable $vgpr4, renamable $vgpr5, killed renamable $vgpr2, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0, $vgpr2
+
+    %2:vgpr_32 = COPY $vgpr2
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
new file mode 100644
index 000000000000000..7caa563d8b29830
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-need-live-out-undef-subregister-def.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
+
+; Check for verifier error after tail duplication. An implicit_def of
+; a subregsiter is needed to maintain liveness after assignment.
+
+define amdgpu_vs void @test(i32 inreg %cmp, i32 %e0) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_cmp_eq_u32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB0_2
+; CHECK-NEXT:  ; %bb.1: ; %load
+; CHECK-NEXT:    s_mov_b32 s1, s0
+; CHECK-NEXT:    s_mov_b32 s2, s0
+; CHECK-NEXT:    s_mov_b32 s3, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    buffer_load_format_xy v[1:2], v1, s[0:3], 0 idxen
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:  .LBB0_2:
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    exp mrt0 v0, v1, v2, v0
+; CHECK-NEXT:    s_endpgm
+entry:
+  %cond = icmp eq i32 %cmp, 0
+  br i1 %cond, label %end, label %load
+
+load:
+  %data1 = call <2 x i32> @llvm.amdgcn.struct.buffer.load.format.v2i32(<4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
+  %e1 = extractelement <2 x i32> %data1, i32 0
+  %e2 = extractelement <2 x i32> %data1, i32 1
+  br label %end
+
+end:
+  %out1 = phi i32 [ 0, %entry ], [ %e1, %load ]
+  %out2 = phi i32 [ poison, %entry ], [ %e2, %load ]
+  call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 %e0, i32 %out1, i32 %out2, i32 %e0, i1 false, i1 false)
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
new file mode 100644
index 000000000000000..86b6c5982b4cbd8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
@@ -0,0 +1,363 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+
+# The partial def of %0 introduces a live out undef def of %0.sub1
+# into bb.3. We need to maintain this liveness with an explicit def of
+# the physical subregister. Without this, a verifier error would
+# appear after tail duplication.
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1_vgpr2
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr2, implicit-def $vgpr0
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16, addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, renamable $vgpr2, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub1:vreg_96 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_96 = BUFFER_LOAD_FORMAT_XYZ_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, killed %0.sub2, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# Test another use of the value before the block end.
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit-def $vgpr1
+  ; CHECK-NEXT:   S_NOP 0, implicit renamable $vgpr0_vgpr1
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr1, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_NOP 0, implicit %0
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub1, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# The undef subregister is not live out, no implicit def should be added for it
+---
+name:            undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: true
+  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
+  stackPtrOffsetReg: '$sgpr32'
+body:             |
+  ; CHECK-LABEL: name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+  ; CHECK-NEXT:   S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0_vgpr1 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+  ; CHECK-NEXT:   EXP 0, killed renamable $vgpr0, renamable $vgpr0, undef renamable $vgpr0, undef renamable $vgpr0, 0, 0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0
+
+    S_CMP_EQ_U32 killed $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    S_NOP 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7
+    %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    %0:vreg_64 = BUFFER_LOAD_FORMAT_XY_IDXEN killed %1, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    EXP 0, killed %0.sub0, killed %0.sub0, undef %2:vgpr_32, undef %2:vgpr_32, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
+
+# In bb.2, %0 should be assigned to vgpr0_vgpr1. Make sure the value
+# copied from $vgpr0 into %3 isn't clobbered by the undef phi def for
+# %0.sub1.
+---
+name:            assigned_physreg_subregister_interference
+tracksRegLiveness: true
+frameInfo:
+  adjustsStack: true
+machineFunctionInfo:
+  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
+  frameOffsetReg:  '$sgpr33'
+  stackPtrOffsetReg: '$sgpr32'
+  wwmReservedRegs:
+    - '$vgpr63'
+body:             |
+  ; CHECK-LABEL: name: assigned_physreg_subregister_interference
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr34, 2, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr35, 3, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr36, 4, $vgpr40
+  ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr37, 5, $vgpr40
+  ; CHECK-NEXT:   renamable $sgpr34_sgpr35 = S_MOV_B64 $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK-NEXT:   liveins: $sgpr34_sgpr35, $vgpr0_vgpr1:0x000000000000000F
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   renamable $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr5 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; CHECK-NEXT:   renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, killed $vgpr0_vgpr1, implicit $exec
+  ; CHECK-NEXT:   renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+  ; CHECK-NEXT:   renamable $vgpr1 = COPY $vgpr0, implicit $exec
+  ; CHECK-NEXT:   renamable $vgpr0 = V_MOV_B32_e32 123, implicit $exec
+  ; CHECK-NEXT:   $exec = S_XOR_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc
+  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   liveins: $vgpr1, $sgpr34_sgpr35
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $exec = COPY renamable $sgpr34_sgpr35
+  ; CHECK-NEXT:   renamable $vgpr0 = V_ADD_U32_e32 1, killed $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 5
+  ; CHECK-NEXT:   $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 4
+  ; CHECK-NEXT:   $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 3
+  ; CHECK-NEXT:   $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 2
+  ; CHECK-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 1
+  ; CHECK-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
+  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  bb.0:
+    liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1, $vgpr63
+
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr30, 0, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr34, 2, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr35, 3, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr36, 4, $vgpr63
+    $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr37, 5, $vgpr63
+    undef %0.sub0:vreg_64 = COPY $vgpr0
+    %0.sub1:vreg_64 = COPY $vgpr1
+    ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    renamable $sgpr34_sgpr35 = S_MOV_B64 $exec
+
+  bb.1:
+    liveins: $vgpr63, $sgpr34_sgpr35
+
+    renamable $sgpr4 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec
+    renamable $sgpr5 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec
+    renamable $vcc = V_CMP_EQ_U64_e64 $sgpr4_sgpr5, %0, implicit $exec
+    renamable $sgpr36_sgpr37 = S_AND_SAVEEXEC_B64 killed renamable $vcc, implicit-def $exec, implicit-def dead $scc, implicit $exec
+
+  bb.2:
+    liveins: $vgpr63, $sgpr4_sgpr5:0x000000000000000F, $sgpr34_sgpr35, $sgpr36_sgpr37
+
+    dead $sgpr30_sgpr31 = noconvergent SI_CALL killed renamable $sgpr4_sgpr5, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
+    %3:vgpr_32 = COPY $vgpr0
+    undef %0.sub0:vreg_64 = V_MOV_B32_e32 123, implicit $exec
+    $exec = S_XOR_B64_term $exec, killed renamable $sgpr36_sgpr37, implicit-def dead $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+
+  bb.3:
+    liveins: $vgpr63, $sgpr34_sgpr35
+
+    $exec = S_MOV_B64_term killed renamable $sgpr34_sgpr35
+
+  bb.4:
+    liveins: $vgpr63
+
+    ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
+    %6:vgpr_32 = V_ADD_U32_e32 1, %3, implicit $exec
+    $vgpr0 = COPY %6
+    $sgpr37 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 5
+    $sgpr36 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 4
+    $sgpr35 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 3
+    $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 2
+    $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 1
+    $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+    SI_RETURN implicit $vgpr0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir
index e63009fdcb43cf2..dd478f94e1039ec 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes machinelicm -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s
 
 # MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping
 # register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs.
diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
index b4f5e057f532b51..6a28eee19d503cf 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=machinelicm -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-machinelicm -o - %s | FileCheck -check-prefix=GCN %s
 
 ---
 name: hoist_move
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
index a95f22507eece3c..ffe9e06c04ae453 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -46,10 +46,10 @@ define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1)
 ; ALL:       load-store-loop:
 ; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
+; ALL-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 1
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
+; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
@@ -66,10 +66,10 @@ define amdgpu_kernel void @min_size_large_static_memcpy_caller0(ptr addrspace(1)
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 1
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -93,20 +93,20 @@ define amdgpu_kernel void @max_size_small_static_memmove_caller0(ptr addrspace(1
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
 ; ALL-NEXT:    [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1024, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP1]], 16
+; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP1]], 256
 ; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP2]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
+; ALL-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP5]], align 1
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1
-; ALL-NEXT:    [[TMP7]] = add i64 [[FWD_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1
+; ALL-NEXT:    [[TMP7]] = add i64 [[FWD_INDEX]], 256
 ; ALL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 1024
 ; ALL-NEXT:    br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
@@ -128,20 +128,20 @@ define amdgpu_kernel void @min_size_large_static_memmove_caller0(ptr addrspace(1
 ; OPT-NEXT:    br label [[MEMMOVE_BWD_LOOP:%.*]]
 ; OPT:       memmove_bwd_loop:
 ; OPT-NEXT:    [[TMP4:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1024, [[MEMMOVE_BWD_RESIDUAL]] ]
-; OPT-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP4]], 16
+; OPT-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP4]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]]
-; OPT-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
+; OPT-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP5]], align 1
 ; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP6]], align 1
+; OPT-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP6]], align 1
 ; OPT-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; OPT-NEXT:    br i1 [[TMP7]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; OPT:       memmove_fwd_loop:
 ; OPT-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP10:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0:%.*]] ]
 ; OPT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]]
-; OPT-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP8]], align 1
+; OPT-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP8]], align 1
 ; OPT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP9]], align 1
-; OPT-NEXT:    [[TMP10]] = add i64 [[FWD_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP9]], align 1
+; OPT-NEXT:    [[TMP10]] = add i64 [[FWD_INDEX]], 256
 ; OPT-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[TMP10]], 1024
 ; OPT-NEXT:    br i1 [[TMP11]], label [[MEMMOVE_FWD_RESIDUAL:%.*]], label [[MEMMOVE_FWD_LOOP]]
 ; OPT:       memmove_fwd_residual:
@@ -421,17 +421,30 @@ define amdgpu_kernel void @memcpy_multi_use_one_function_keep_small(ptr addrspac
 ; ALL-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
 ; ALL-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
 ; ALL:       post-loop-memcpy-expansion:
-; ALL-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
-; ALL:       load-store-loop:
-; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION]] ], [ [[TMP19:%.*]], [[LOAD_STORE_LOOP]] ]
-; ALL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 0
 ; ALL-NEXT:    [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP16]], align 1
-; ALL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1:%.*]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1:%.*]], i64 0
 ; ALL-NEXT:    store <4 x i32> [[TMP17]], ptr addrspace(1) [[TMP18]], align 1
-; ALL-NEXT:    [[TMP19]] = add i64 [[LOOP_INDEX]], 16
-; ALL-NEXT:    [[TMP20:%.*]] = icmp ult i64 [[TMP19]], 96
-; ALL-NEXT:    br i1 [[TMP20]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
-; ALL:       memcpy-split:
+; ALL-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 16
+; ALL-NEXT:    [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP33]], align 1
+; ALL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 16
+; ALL-NEXT:    store <4 x i32> [[TMP19]], ptr addrspace(1) [[TMP20]], align 1
+; ALL-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 32
+; ALL-NEXT:    [[TMP35:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP34]], align 1
+; ALL-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 32
+; ALL-NEXT:    store <4 x i32> [[TMP35]], ptr addrspace(1) [[TMP36]], align 1
+; ALL-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 48
+; ALL-NEXT:    [[TMP38:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP37]], align 1
+; ALL-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 48
+; ALL-NEXT:    store <4 x i32> [[TMP38]], ptr addrspace(1) [[TMP39]], align 1
+; ALL-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 64
+; ALL-NEXT:    [[TMP28:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP40]], align 1
+; ALL-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 64
+; ALL-NEXT:    store <4 x i32> [[TMP28]], ptr addrspace(1) [[TMP29]], align 1
+; ALL-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 80
+; ALL-NEXT:    [[TMP31:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP30]], align 1
+; ALL-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 80
+; ALL-NEXT:    store <4 x i32> [[TMP31]], ptr addrspace(1) [[TMP32]], align 1
 ; ALL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 96
 ; ALL-NEXT:    [[TMP22:%.*]] = load i32, ptr addrspace(1) [[TMP21]], align 1
 ; ALL-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST1]], i64 96
@@ -456,10 +469,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1028(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -479,10 +492,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1025(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -502,10 +515,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1026(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -525,10 +538,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1032(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -548,10 +561,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1034(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -575,10 +588,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1035(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -606,10 +619,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1036(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -633,10 +646,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1039(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -691,10 +704,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_1027(ptr addrspace
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -764,10 +777,10 @@ define amdgpu_kernel void @memcpy_private_align4_private_align4_1027(ptr addrspa
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -814,10 +827,10 @@ define amdgpu_kernel void @memcpy_private_align1_private_align4_1027(ptr addrspa
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 4
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 4
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1
+; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -864,10 +877,10 @@ define amdgpu_kernel void @memcpy_private_align4_private_align1_1027(ptr addrspa
 ; OPT:       load-store-loop:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; OPT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 1
+; OPT-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 1
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i32 [[LOOP_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4
-; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 4
+; OPT-NEXT:    [[TMP4]] = add i32 [[LOOP_INDEX]], 256
 ; OPT-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 1024
 ; OPT-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; OPT:       memcpy-split:
@@ -1194,17 +1207,10 @@ define amdgpu_kernel void @memcpy_global_align4_global_align4_16(ptr addrspace(1
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_global_align4_global_align4_16(
-; ALL-NEXT:    br label [[LOAD_STORE_LOOP:%.*]]
-; ALL:       load-store-loop:
-; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
-; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 0
 ; ALL-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 4
-; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
+; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 0
 ; ALL-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 4
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
-; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 16
-; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
-; ALL:       memcpy-split:
 ; ALL-NEXT:    ret void
 ;
   call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) align 4 %dst, ptr addrspace(1) align 4 %src, i64 16, i1 false)
@@ -1326,20 +1332,20 @@ define amdgpu_kernel void @memmove_flat_align1_global_align1(ptr %dst, ptr addrs
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
 ; ALL-NEXT:    [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 16
+; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 256
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP3]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT]], ptr [[TMP4]], align 1
+; ALL-NEXT:    store <64 x i32> [[ELEMENT]], ptr [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP6]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP6]], align 1
 ; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1
-; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1
+; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 256
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256
 ; ALL-NEXT:    br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
@@ -1360,20 +1366,20 @@ define amdgpu_kernel void @memmove_global_align1_flat_align1(ptr addrspace(1) %d
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
 ; ALL-NEXT:    [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 16
+; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 256
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP4]], align 1
+; ALL-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr [[TMP6]], align 1
 ; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP7]], align 1
-; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP7]], align 1
+; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 256
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256
 ; ALL-NEXT:    br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
@@ -1394,20 +1400,20 @@ define amdgpu_kernel void @memmove_flat_align1_private_align1(ptr %dst, ptr addr
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
 ; ALL-NEXT:    [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 16
+; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 256
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP3]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT]], ptr [[TMP4]], align 1
+; ALL-NEXT:    store <64 x i32> [[ELEMENT]], ptr [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP6]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP6]], align 1
 ; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1
-; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[ELEMENT1]], ptr [[TMP7]], align 1
+; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 256
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256
 ; ALL-NEXT:    br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
@@ -1428,20 +1434,20 @@ define amdgpu_kernel void @memmove_private_align1_flat_align1(ptr addrspace(5) %
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
 ; ALL-NEXT:    [[TMP2:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 16
+; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP2]], 256
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP4]], align 1
+; ALL-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP4]], align 1
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP5]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP8:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr [[TMP6]], align 1
 ; ALL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP7]], align 1
-; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP7]], align 1
+; ALL-NEXT:    [[TMP8]] = add i64 [[FWD_INDEX]], 256
 ; ALL-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[TMP8]], 256
 ; ALL-NEXT:    br i1 [[TMP9]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
@@ -1461,10 +1467,10 @@ define amdgpu_kernel void @memmove_private_align1_global_align1(ptr addrspace(5)
 ; ALL:       load-store-loop:
 ; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP1]], align 1, !alias.scope [[META0:![0-9]+]]
+; ALL-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP1]], align 1, !alias.scope [[META0:![0-9]+]]
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1, !noalias [[META0]]
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(5) [[TMP3]], align 1, !noalias [[META0]]
+; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 256
 ; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
@@ -1484,10 +1490,10 @@ define amdgpu_kernel void @memmove_global_align1_private_align1(ptr addrspace(1)
 ; ALL:       load-store-loop:
 ; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP1]], align 1, !alias.scope [[META3:![0-9]+]]
+; ALL-NEXT:    [[TMP2:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP1]], align 1, !alias.scope [[META3:![0-9]+]]
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1, !noalias [[META3]]
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1, !noalias [[META3]]
+; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
 ; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 256
 ; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
@@ -2144,20 +2150,20 @@ define amdgpu_kernel void @memmove_private_align1_private_align1(ptr addrspace(5
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
 ; ALL-NEXT:    [[TMP1:%.*]] = phi i32 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 256, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i32 [[TMP1]], 16
+; ALL-NEXT:    [[BWD_INDEX]] = sub i32 [[TMP1]], 256
 ; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP2]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 [[BWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP3]], align 1
+; ALL-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(5) [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i32 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[SRC]], i32 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(5) [[TMP5]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(5) [[TMP5]], align 1
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[DST]], i32 [[FWD_INDEX]]
-; ALL-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP6]], align 1
-; ALL-NEXT:    [[TMP7]] = add i32 [[FWD_INDEX]], 16
+; ALL-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(5) [[TMP6]], align 1
+; ALL-NEXT:    [[TMP7]] = add i32 [[FWD_INDEX]], 256
 ; ALL-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 256
 ; ALL-NEXT:    br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
@@ -2231,27 +2237,27 @@ define amdgpu_kernel void @memmove_global_align4_static_residual_empty(ptr addrs
 ; OPT-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
 ; OPT-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; OPT:       memmove_bwd_loop:
-; OPT-NEXT:    [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1040, [[TMP0:%.*]] ]
-; OPT-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP1]], 16
+; OPT-NEXT:    [[TMP11:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1280, [[TMP0:%.*]] ]
+; OPT-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP11]], 256
 ; OPT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]]
-; OPT-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP2]], align 1
+; OPT-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP2]], align 1
 ; OPT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
+; OPT-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
 ; OPT-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; OPT-NEXT:    br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; OPT:       memmove_fwd_loop:
 ; OPT-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]]
-; OPT-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
+; OPT-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP5]], align 1
 ; OPT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1
-; OPT-NEXT:    [[TMP7]] = add i64 [[FWD_INDEX]], 16
-; OPT-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 1040
+; OPT-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1
+; OPT-NEXT:    [[TMP7]] = add i64 [[FWD_INDEX]], 256
+; OPT-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 1280
 ; OPT-NEXT:    br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; OPT:       memmove_done:
 ; OPT-NEXT:    ret void
 ;
-  call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1040, i1 false)
+  call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 1280, i1 false)
   ret void
 }
 
@@ -2279,20 +2285,20 @@ define amdgpu_kernel void @memmove_global_align4_static_residual_full(ptr addrsp
 ; OPT-NEXT:    br label [[MEMMOVE_BWD_LOOP:%.*]]
 ; OPT:       memmove_bwd_loop:
 ; OPT-NEXT:    [[TMP13:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 1024, [[MEMMOVE_BWD_RESIDUAL]] ]
-; OPT-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP13]], 16
+; OPT-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP13]], 256
 ; OPT-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]]
-; OPT-NEXT:    [[ELEMENT:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP14]], align 1
+; OPT-NEXT:    [[ELEMENT:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP14]], align 1
 ; OPT-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP15]], align 1
+; OPT-NEXT:    store <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP15]], align 1
 ; OPT-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; OPT-NEXT:    br i1 [[TMP16]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; OPT:       memmove_fwd_loop:
 ; OPT-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP19:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0:%.*]] ]
 ; OPT-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]]
-; OPT-NEXT:    [[ELEMENT1:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP17]], align 1
+; OPT-NEXT:    [[ELEMENT1:%.*]] = load <64 x i32>, ptr addrspace(1) [[TMP17]], align 1
 ; OPT-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]]
-; OPT-NEXT:    store <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP18]], align 1
-; OPT-NEXT:    [[TMP19]] = add i64 [[FWD_INDEX]], 16
+; OPT-NEXT:    store <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP18]], align 1
+; OPT-NEXT:    [[TMP19]] = add i64 [[FWD_INDEX]], 256
 ; OPT-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[TMP19]], 1024
 ; OPT-NEXT:    br i1 [[TMP20]], label [[MEMMOVE_FWD_RESIDUAL:%.*]], label [[MEMMOVE_FWD_LOOP]]
 ; OPT:       memmove_fwd_residual:
@@ -2363,40 +2369,40 @@ entry:
 
 define amdgpu_kernel void @memmove_volatile(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
 ; MAX1024-LABEL: @memmove_volatile(
-; MAX1024-NEXT:    call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true)
+; MAX1024-NEXT:    call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 512, i1 true)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memmove_volatile(
 ; ALL-NEXT:    [[COMPARE_SRC_DST:%.*]] = icmp ult ptr addrspace(1) [[SRC:%.*]], [[DST:%.*]]
 ; ALL-NEXT:    br i1 [[COMPARE_SRC_DST]], label [[MEMMOVE_BWD_LOOP:%.*]], label [[MEMMOVE_FWD_LOOP:%.*]]
 ; ALL:       memmove_bwd_loop:
-; ALL-NEXT:    [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 64, [[TMP0:%.*]] ]
-; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP1]], 16
+; ALL-NEXT:    [[TMP1:%.*]] = phi i64 [ [[BWD_INDEX:%.*]], [[MEMMOVE_BWD_LOOP]] ], [ 512, [[TMP0:%.*]] ]
+; ALL-NEXT:    [[BWD_INDEX]] = sub i64 [[TMP1]], 256
 ; ALL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP2]], align 1
+; ALL-NEXT:    [[ELEMENT:%.*]] = load volatile <64 x i32>, ptr addrspace(1) [[TMP2]], align 1
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[BWD_INDEX]]
-; ALL-NEXT:    store volatile <4 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
+; ALL-NEXT:    store volatile <64 x i32> [[ELEMENT]], ptr addrspace(1) [[TMP3]], align 1
 ; ALL-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[BWD_INDEX]], 0
 ; ALL-NEXT:    br i1 [[TMP4]], label [[MEMMOVE_DONE:%.*]], label [[MEMMOVE_BWD_LOOP]]
 ; ALL:       memmove_fwd_loop:
 ; ALL-NEXT:    [[FWD_INDEX:%.*]] = phi i64 [ [[TMP7:%.*]], [[MEMMOVE_FWD_LOOP]] ], [ 0, [[TMP0]] ]
 ; ALL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    [[ELEMENT1:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
+; ALL-NEXT:    [[ELEMENT1:%.*]] = load volatile <64 x i32>, ptr addrspace(1) [[TMP5]], align 1
 ; ALL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST]], i64 [[FWD_INDEX]]
-; ALL-NEXT:    store volatile <4 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1
-; ALL-NEXT:    [[TMP7]] = add i64 [[FWD_INDEX]], 16
-; ALL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 64
+; ALL-NEXT:    store volatile <64 x i32> [[ELEMENT1]], ptr addrspace(1) [[TMP6]], align 1
+; ALL-NEXT:    [[TMP7]] = add i64 [[FWD_INDEX]], 256
+; ALL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 512
 ; ALL-NEXT:    br i1 [[TMP8]], label [[MEMMOVE_DONE]], label [[MEMMOVE_FWD_LOOP]]
 ; ALL:       memmove_done:
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 64, i1 true)
+  call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 512, i1 true)
   ret void
 }
 
 define amdgpu_kernel void @memcpy_volatile(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
 ; MAX1024-LABEL: @memcpy_volatile(
-; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 64, i1 true)
+; MAX1024-NEXT:    call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) [[DST:%.*]], ptr addrspace(1) [[SRC:%.*]], i64 512, i1 true)
 ; MAX1024-NEXT:    ret void
 ;
 ; ALL-LABEL: @memcpy_volatile(
@@ -2404,16 +2410,16 @@ define amdgpu_kernel void @memcpy_volatile(ptr addrspace(1) %dst, ptr addrspace(
 ; ALL:       load-store-loop:
 ; ALL-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[LOAD_STORE_LOOP]] ]
 ; ALL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    [[TMP2:%.*]] = load volatile <4 x i32>, ptr addrspace(1) [[TMP1]], align 1
+; ALL-NEXT:    [[TMP2:%.*]] = load volatile <64 x i32>, ptr addrspace(1) [[TMP1]], align 1
 ; ALL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[DST:%.*]], i64 [[LOOP_INDEX]]
-; ALL-NEXT:    store volatile <4 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
-; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 16
-; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 64
+; ALL-NEXT:    store volatile <64 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 1
+; ALL-NEXT:    [[TMP4]] = add i64 [[LOOP_INDEX]], 256
+; ALL-NEXT:    [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 512
 ; ALL-NEXT:    br i1 [[TMP5]], label [[LOAD_STORE_LOOP]], label [[MEMCPY_SPLIT:%.*]]
 ; ALL:       memcpy-split:
 ; ALL-NEXT:    ret void
 ;
-  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 64, i1 true)
+  call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst, ptr addrspace(1) %src, i64 512, i1 true)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
new file mode 100644
index 000000000000000..565fce0e7abdeae
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
@@ -0,0 +1,16049 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 %s -o - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -mattr=-unaligned-access-mode %s -o - | FileCheck -check-prefix=ALIGNED %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -amdgpu-memcpy-loop-unroll=3 %s -o - | FileCheck -check-prefix=UNROLL3 %s
+
+; For checking that LowerMemIntrinsics lowers memcpy and memmove with large
+; constant copy-sizes into loops with multiple load/store pairs.
+
+
+; memcpy for address spaces 0, 1, 4, 5
+
+define void @memcpy_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
+; CHECK-LABEL: memcpy_p0_p0_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB0_1: ; %load-store-loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[96:97] offset:224
+; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[96:97] offset:240
+; CHECK-NEXT:    flat_load_dwordx4 v[12:15], v[96:97] offset:192
+; CHECK-NEXT:    flat_load_dwordx4 v[16:19], v[96:97] offset:208
+; CHECK-NEXT:    flat_load_dwordx4 v[20:23], v[96:97] offset:160
+; CHECK-NEXT:    flat_load_dwordx4 v[24:27], v[96:97] offset:176
+; CHECK-NEXT:    flat_load_dwordx4 v[28:31], v[96:97] offset:128
+; CHECK-NEXT:    flat_load_dwordx4 v[32:35], v[96:97] offset:144
+; CHECK-NEXT:    flat_load_dwordx4 v[36:39], v[96:97] offset:96
+; CHECK-NEXT:    flat_load_dwordx4 v[48:51], v[96:97] offset:112
+; CHECK-NEXT:    flat_load_dwordx4 v[52:55], v[96:97] offset:64
+; CHECK-NEXT:    flat_load_dwordx4 v[64:67], v[96:97] offset:80
+; CHECK-NEXT:    flat_load_dwordx4 v[68:71], v[96:97] offset:32
+; CHECK-NEXT:    flat_load_dwordx4 v[80:83], v[96:97] offset:48
+; CHECK-NEXT:    flat_load_dwordx4 v[84:87], v[96:97]
+; CHECK-NEXT:    flat_load_dwordx4 v[96:99], v[96:97] offset:16
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(15) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[4:7] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(14) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[8:11] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(13) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[12:15] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(12) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[16:19] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(11) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[20:23] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(10) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[24:27] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(9) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[28:31] offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(8) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[32:35] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(7) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[36:39] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(6) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(5) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99] offset:16
+; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; CHECK-NEXT:    s_cbranch_vccnz .LBB0_1
+; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memcpy_p0_p0_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:  .LBB0_1: ; %load-store-loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v24, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    flat_load_dwordx4 v[16:19], v[24:25] offset:240
+; ALIGNED-NEXT:    flat_load_dwordx4 v[20:23], v[24:25] offset:224
+; ALIGNED-NEXT:    flat_load_dwordx4 v[4:7], v[24:25]
+; ALIGNED-NEXT:    flat_load_dwordx4 v[8:11], v[24:25] offset:16
+; ALIGNED-NEXT:    flat_load_dwordx4 v[12:15], v[24:25] offset:32
+; ALIGNED-NEXT:    flat_load_dwordx4 v[112:115], v[24:25] offset:48
+; ALIGNED-NEXT:    flat_load_dwordx4 v[116:119], v[24:25] offset:64
+; ALIGNED-NEXT:    flat_load_dwordx4 v[40:43], v[24:25] offset:80
+; ALIGNED-NEXT:    flat_load_dwordx4 v[26:29], v[24:25] offset:96
+; ALIGNED-NEXT:    flat_load_dwordx4 v[32:35], v[24:25] offset:112
+; ALIGNED-NEXT:    flat_load_dwordx4 v[44:47], v[24:25] offset:128
+; ALIGNED-NEXT:    flat_load_dwordx4 v[52:55], v[24:25] offset:144
+; ALIGNED-NEXT:    flat_load_dwordx4 v[66:69], v[24:25] offset:160
+; ALIGNED-NEXT:    flat_load_dwordx4 v[81:84], v[24:25] offset:176
+; ALIGNED-NEXT:    flat_load_dwordx4 v[96:99], v[24:25] offset:192
+; ALIGNED-NEXT:    flat_load_dwordx4 v[100:103], v[24:25] offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15) lgkmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v31 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v30 offset:250
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v25 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v24 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:240
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(22)
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v51 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v50 offset:234
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v49 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v36 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:224
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(16)
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v102, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_store_dword v103, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 8, v30
+; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v71 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v70 offset:218
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v65 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v64 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v96, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v97, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v87, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v86, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v85, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v87 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v86 offset:202
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v85 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:196
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v80 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v101, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v99, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v96, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v101 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v99 offset:186
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:184
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v96 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:180
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v81 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v100, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v97, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v100 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:172
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v97 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:168
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v82 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:164
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v66 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v98, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v98 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:156
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v83 offset:154
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:152
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v67 offset:150
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v52 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v84, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v84 offset:142
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v84 offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v68 offset:138
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v68 offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v53 offset:134
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:132
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v37 offset:130
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v69 offset:126
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v69 offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v54 offset:122
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v54 offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v38 offset:118
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v32 offset:114
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v55 offset:110
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v55 offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v39 offset:106
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v33 offset:102
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v26 offset:98
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v48 offset:94
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v34 offset:90
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v27 offset:86
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v21 offset:82
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v116, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v117, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_store_dword v118, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_store_dword v119, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v35 offset:78
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v28 offset:74
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v22 offset:70
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v19 offset:66
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v50
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 8, v50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v29 offset:62
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v23 offset:58
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v20 offset:54
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v18 offset:50
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v15 offset:42
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v15 offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v14 offset:46
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v14 offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v13 offset:34
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v13 offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v12 offset:38
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v12 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v11 offset:30
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v10 offset:26
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v9 offset:22
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v8 offset:18
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:16
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v49
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v49
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v36
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 8, v36
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v71
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v71
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v64
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 8, v64
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v67
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v67
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:243
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v87
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v87
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 8, v86
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:239
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v85
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v85
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 8, v80
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v101
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v99
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v96
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 8, v96
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v81
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v100
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 8, v97
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v82
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 8, v82
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 8, v66
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v83
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 8, v52
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:211
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v84
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 8, v84
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v68
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:149
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 24, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v68
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:207
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v53
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 24, v37
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v37
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v69
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v69
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v54
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v54
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 8, v38
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 24, v32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 8, v32
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v55
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v39
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v33
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 8, v26
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 8, v48
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v34
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 8, v34
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 8, v27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 8, v21
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:179
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v35
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 8, v28
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 8, v22
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 8, v19
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v29
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v29
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 8, v23
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 8, v20
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 8, v18
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v15, 8, v15
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v14, 8, v14
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:159
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v13, 8, v13
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:157
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v12, 8, v12
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:155
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 8, v11
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:153
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 24, v10
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:151
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:147
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:145
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:143
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v84 offset:141
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:139
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v68 offset:137
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:135
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:133
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:131
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:129
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:127
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v69 offset:125
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:123
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v54 offset:121
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:119
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:117
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:115
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:113
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:111
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v55 offset:109
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:107
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:105
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:103
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:101
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:99
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:97
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:95
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:93
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:91
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:89
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:87
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:85
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:83
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:81
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:79
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:77
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:75
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:73
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:71
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:69
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:67
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:63
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:61
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:59
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:57
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:55
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:53
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:51
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:49
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:43
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v15 offset:41
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:47
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v14 offset:45
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:35
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v13 offset:33
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:39
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v12 offset:37
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:31
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:29
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:25
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:23
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:21
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:19
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v7 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v7 offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v6 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v6 offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v5 offset:6
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v5 offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v4 offset:2
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v4, 8, v4
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v7 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v6 offset:9
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:7
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v5 offset:5
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:3
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v4 offset:1
+; ALIGNED-NEXT:    s_cbranch_vccnz .LBB0_1
+; ALIGNED-NEXT:  ; %bb.2: ; %memcpy-split
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memcpy_p0_p0_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB0_1: ; %load-store-loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[12:13]
+; UNROLL3-NEXT:    flat_load_dwordx4 v[8:11], v[12:13] offset:16
+; UNROLL3-NEXT:    flat_load_dwordx4 v[12:15], v[12:13] offset:32
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; UNROLL3-NEXT:    s_cbranch_vccnz .LBB0_1
+; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2016
+; UNROLL3-NEXT:    flat_load_dwordx4 v[2:5], v[2:3] offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[2:5] offset:2032
+; UNROLL3-NEXT:    s_waitcnt lgkmcnt(0)
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memcpy_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
+; CHECK-LABEL: memcpy_p1_p1_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB1_1: ; %load-store-loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[96:97], off offset:224
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[96:97], off offset:240
+; CHECK-NEXT:    global_load_dwordx4 v[12:15], v[96:97], off offset:192
+; CHECK-NEXT:    global_load_dwordx4 v[16:19], v[96:97], off offset:208
+; CHECK-NEXT:    global_load_dwordx4 v[20:23], v[96:97], off offset:160
+; CHECK-NEXT:    global_load_dwordx4 v[24:27], v[96:97], off offset:176
+; CHECK-NEXT:    global_load_dwordx4 v[28:31], v[96:97], off offset:128
+; CHECK-NEXT:    global_load_dwordx4 v[32:35], v[96:97], off offset:144
+; CHECK-NEXT:    global_load_dwordx4 v[36:39], v[96:97], off offset:96
+; CHECK-NEXT:    global_load_dwordx4 v[48:51], v[96:97], off offset:112
+; CHECK-NEXT:    global_load_dwordx4 v[52:55], v[96:97], off offset:64
+; CHECK-NEXT:    global_load_dwordx4 v[64:67], v[96:97], off offset:80
+; CHECK-NEXT:    global_load_dwordx4 v[68:71], v[96:97], off offset:32
+; CHECK-NEXT:    global_load_dwordx4 v[80:83], v[96:97], off offset:48
+; CHECK-NEXT:    global_load_dwordx4 v[84:87], v[96:97], off
+; CHECK-NEXT:    global_load_dwordx4 v[96:99], v[96:97], off offset:16
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[4:7], off offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[8:11], off offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[12:15], off offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[16:19], off offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[20:23], off offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[24:27], off offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[28:31], off offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[32:35], off offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[36:39], off offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[48:51], off offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[52:55], off offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[64:67], off offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[68:71], off offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[80:83], off offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[84:87], off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[96:99], off offset:16
+; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; CHECK-NEXT:    s_cbranch_vccnz .LBB1_1
+; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memcpy_p1_p1_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:  .LBB1_1: ; %load-store-loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v24, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    global_load_dwordx4 v[16:19], v[24:25], off offset:240
+; ALIGNED-NEXT:    global_load_dwordx4 v[20:23], v[24:25], off offset:224
+; ALIGNED-NEXT:    global_load_dwordx4 v[4:7], v[24:25], off
+; ALIGNED-NEXT:    global_load_dwordx4 v[8:11], v[24:25], off offset:16
+; ALIGNED-NEXT:    global_load_dwordx4 v[12:15], v[24:25], off offset:32
+; ALIGNED-NEXT:    global_load_dwordx4 v[112:115], v[24:25], off offset:48
+; ALIGNED-NEXT:    global_load_dwordx4 v[116:119], v[24:25], off offset:64
+; ALIGNED-NEXT:    global_load_dwordx4 v[40:43], v[24:25], off offset:80
+; ALIGNED-NEXT:    global_load_dwordx4 v[26:29], v[24:25], off offset:96
+; ALIGNED-NEXT:    global_load_dwordx4 v[32:35], v[24:25], off offset:112
+; ALIGNED-NEXT:    global_load_dwordx4 v[44:47], v[24:25], off offset:128
+; ALIGNED-NEXT:    global_load_dwordx4 v[52:55], v[24:25], off offset:144
+; ALIGNED-NEXT:    global_load_dwordx4 v[66:69], v[24:25], off offset:160
+; ALIGNED-NEXT:    global_load_dwordx4 v[81:84], v[24:25], off offset:176
+; ALIGNED-NEXT:    global_load_dwordx4 v[96:99], v[24:25], off offset:192
+; ALIGNED-NEXT:    global_load_dwordx4 v[100:103], v[24:25], off offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v31, off offset:254
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v30, off offset:250
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v25, off offset:246
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v24, off offset:242
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:240
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v51, off offset:238
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v50, off offset:234
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v49, off offset:230
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v36, off offset:226
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:224
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v102, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_store_dword v103, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 8, v30
+; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v71, off offset:222
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v70, off offset:218
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v65, off offset:214
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v64, off offset:210
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:208
+; ALIGNED-NEXT:    buffer_store_dword v96, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v97, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v87, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v86, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v85, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v87, off offset:206
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v86, off offset:202
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v85, off offset:198
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:196
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v80, off offset:194
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:192
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v101, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v99, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v96, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v101, off offset:190
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v99, off offset:186
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:184
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v96, off offset:182
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:180
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v81, off offset:178
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:176
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v100, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v97, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v100, off offset:174
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:172
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v97, off offset:170
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:168
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v82, off offset:166
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:164
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v66, off offset:162
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:160
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v98, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v98, off offset:158
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:156
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v83, off offset:154
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:152
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v67, off offset:150
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v52, off offset:146
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:144
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v84, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v84, off offset:142
+; ALIGNED-NEXT:    global_store_byte v[16:17], v84, off offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v68, off offset:138
+; ALIGNED-NEXT:    global_store_byte v[16:17], v68, off offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v53, off offset:134
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:132
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v37, off offset:130
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:128
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v69, off offset:126
+; ALIGNED-NEXT:    global_store_byte v[16:17], v69, off offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v54, off offset:122
+; ALIGNED-NEXT:    global_store_byte v[16:17], v54, off offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v38, off offset:118
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v32, off offset:114
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:112
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v55, off offset:110
+; ALIGNED-NEXT:    global_store_byte v[16:17], v55, off offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v39, off offset:106
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v33, off offset:102
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v26, off offset:98
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:96
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v48, off offset:94
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v34, off offset:90
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v27, off offset:86
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v21, off offset:82
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:80
+; ALIGNED-NEXT:    buffer_store_dword v116, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v117, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_store_dword v118, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_store_dword v119, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v35, off offset:78
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v28, off offset:74
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v22, off offset:70
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v19, off offset:66
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:64
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v50
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 8, v50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v29, off offset:62
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v23, off offset:58
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v20, off offset:54
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v18, off offset:50
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:48
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v15, off offset:42
+; ALIGNED-NEXT:    global_store_byte v[16:17], v15, off offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v14, off offset:46
+; ALIGNED-NEXT:    global_store_byte v[16:17], v14, off offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v13, off offset:34
+; ALIGNED-NEXT:    global_store_byte v[16:17], v13, off offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v12, off offset:38
+; ALIGNED-NEXT:    global_store_byte v[16:17], v12, off offset:36
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v11, off offset:30
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v10, off offset:26
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v9, off offset:22
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v8, off offset:18
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:16
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v49
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v49
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v36
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 8, v36
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v71
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v71
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v64
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 8, v64
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v67
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v67
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:243
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v87
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v87
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 8, v86
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:239
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v85
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v85
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 8, v80
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v101
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v99
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v96
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 8, v96
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v81
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v100
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 8, v97
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v82
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 8, v82
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 8, v66
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v83
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 8, v52
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:211
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v84
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 8, v84
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v68
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:149
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 24, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v68
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:207
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v53
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 24, v37
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v37
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v69
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v69
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v54
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v54
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 8, v38
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 24, v32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 8, v32
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v55
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v39
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v33
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 8, v26
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 8, v48
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v34
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 8, v34
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 8, v27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 8, v21
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:179
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v35
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 8, v28
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 8, v22
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 8, v19
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v29
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v29
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 8, v23
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 8, v20
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 8, v18
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v15, 8, v15
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v14, 8, v14
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:159
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v13, 8, v13
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:157
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v12, 8, v12
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:155
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 8, v11
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:153
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 24, v10
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:151
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:147
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:145
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:143
+; ALIGNED-NEXT:    global_store_byte v[16:17], v84, off offset:141
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:139
+; ALIGNED-NEXT:    global_store_byte v[16:17], v68, off offset:137
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:135
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:133
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:131
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:129
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:127
+; ALIGNED-NEXT:    global_store_byte v[16:17], v69, off offset:125
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:123
+; ALIGNED-NEXT:    global_store_byte v[16:17], v54, off offset:121
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:119
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:117
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:115
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:113
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:111
+; ALIGNED-NEXT:    global_store_byte v[16:17], v55, off offset:109
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:107
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:105
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:103
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:101
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:99
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:97
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:95
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:93
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:91
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:89
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:87
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:85
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:83
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:81
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:79
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:77
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:75
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:73
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:71
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:69
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:67
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:63
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:61
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:59
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:57
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:55
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:53
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:51
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:49
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:43
+; ALIGNED-NEXT:    global_store_byte v[16:17], v15, off offset:41
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:47
+; ALIGNED-NEXT:    global_store_byte v[16:17], v14, off offset:45
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:35
+; ALIGNED-NEXT:    global_store_byte v[16:17], v13, off offset:33
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:39
+; ALIGNED-NEXT:    global_store_byte v[16:17], v12, off offset:37
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:31
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:29
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:25
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:23
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:21
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:19
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v7, off offset:14
+; ALIGNED-NEXT:    global_store_byte v[16:17], v7, off offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v6, off offset:10
+; ALIGNED-NEXT:    global_store_byte v[16:17], v6, off offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v5, off offset:6
+; ALIGNED-NEXT:    global_store_byte v[16:17], v5, off offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v4, off offset:2
+; ALIGNED-NEXT:    global_store_byte v[16:17], v4, off
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v4, 8, v4
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:15
+; ALIGNED-NEXT:    global_store_byte v[16:17], v7, off offset:13
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:11
+; ALIGNED-NEXT:    global_store_byte v[16:17], v6, off offset:9
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:7
+; ALIGNED-NEXT:    global_store_byte v[16:17], v5, off offset:5
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:3
+; ALIGNED-NEXT:    global_store_byte v[16:17], v4, off offset:1
+; ALIGNED-NEXT:    s_cbranch_vccnz .LBB1_1
+; ALIGNED-NEXT:  ; %bb.2: ; %memcpy-split
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memcpy_p1_p1_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB1_1: ; %load-store-loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[12:13], off
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[12:13], off offset:16
+; UNROLL3-NEXT:    global_load_dwordx4 v[12:15], v[12:13], off offset:32
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[4:7], off
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[8:11], off offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[12:15], off offset:32
+; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; UNROLL3-NEXT:    s_cbranch_vccnz .LBB1_1
+; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2016
+; UNROLL3-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:2032
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memcpy_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
+; CHECK-LABEL: memcpy_p0_p4_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB2_1: ; %load-store-loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[96:97], off offset:240
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[96:97], off offset:224
+; CHECK-NEXT:    global_load_dwordx4 v[12:15], v[96:97], off offset:208
+; CHECK-NEXT:    global_load_dwordx4 v[16:19], v[96:97], off offset:192
+; CHECK-NEXT:    global_load_dwordx4 v[20:23], v[96:97], off offset:176
+; CHECK-NEXT:    global_load_dwordx4 v[24:27], v[96:97], off offset:160
+; CHECK-NEXT:    global_load_dwordx4 v[28:31], v[96:97], off offset:144
+; CHECK-NEXT:    global_load_dwordx4 v[32:35], v[96:97], off offset:128
+; CHECK-NEXT:    global_load_dwordx4 v[36:39], v[96:97], off offset:112
+; CHECK-NEXT:    global_load_dwordx4 v[48:51], v[96:97], off offset:96
+; CHECK-NEXT:    global_load_dwordx4 v[52:55], v[96:97], off offset:80
+; CHECK-NEXT:    global_load_dwordx4 v[64:67], v[96:97], off offset:64
+; CHECK-NEXT:    global_load_dwordx4 v[68:71], v[96:97], off offset:48
+; CHECK-NEXT:    global_load_dwordx4 v[80:83], v[96:97], off offset:32
+; CHECK-NEXT:    global_load_dwordx4 v[84:87], v[96:97], off offset:16
+; CHECK-NEXT:    global_load_dwordx4 v[96:99], v[96:97], off
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[4:7] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[8:11] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[12:15] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[16:19] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[20:23] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[24:27] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[28:31] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[32:35] offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[36:39] offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
+; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; CHECK-NEXT:    s_cbranch_vccnz .LBB2_1
+; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memcpy_p0_p4_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:  .LBB2_1: ; %load-store-loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v4, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    v_add_co_u32 v96, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    global_load_dwordx4 v[98:101], v[4:5], off offset:240
+; ALIGNED-NEXT:    global_load_dwordx4 v[84:87], v[4:5], off offset:224
+; ALIGNED-NEXT:    global_load_dwordx4 v[80:83], v[4:5], off offset:208
+; ALIGNED-NEXT:    global_load_dwordx4 v[68:71], v[4:5], off offset:192
+; ALIGNED-NEXT:    global_load_dwordx4 v[64:67], v[4:5], off offset:176
+; ALIGNED-NEXT:    global_load_dwordx4 v[52:55], v[4:5], off offset:160
+; ALIGNED-NEXT:    global_load_dwordx4 v[48:51], v[4:5], off offset:144
+; ALIGNED-NEXT:    global_load_dwordx4 v[36:39], v[4:5], off offset:128
+; ALIGNED-NEXT:    global_load_dwordx4 v[32:35], v[4:5], off offset:112
+; ALIGNED-NEXT:    global_load_dwordx4 v[28:31], v[4:5], off offset:96
+; ALIGNED-NEXT:    global_load_dwordx4 v[24:27], v[4:5], off offset:80
+; ALIGNED-NEXT:    global_load_dwordx4 v[20:23], v[4:5], off offset:64
+; ALIGNED-NEXT:    global_load_dwordx4 v[16:19], v[4:5], off offset:48
+; ALIGNED-NEXT:    global_load_dwordx4 v[12:15], v[4:5], off offset:32
+; ALIGNED-NEXT:    global_load_dwordx4 v[8:11], v[4:5], off offset:16
+; ALIGNED-NEXT:    global_load_dwordx4 v[4:7], v[4:5], off
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v100 offset:250
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v101 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:252
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:248
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v99 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:244
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v98 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:240
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    s_waitcnt vmcnt(14)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v86
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:243
+; ALIGNED-NEXT:    s_waitcnt vmcnt(13)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v82
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v82
+; ALIGNED-NEXT:    buffer_store_dword v86, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_store_dword v87, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    buffer_store_dword v85, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v86 offset:234
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v87 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:236
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:232
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v85 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:228
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v84 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:224
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v81
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v80
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v80
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:239
+; ALIGNED-NEXT:    s_waitcnt vmcnt(12)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v69
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v69
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_store_dword v80, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v82 offset:218
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v83 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:220
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:216
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v81 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:212
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v80 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:208
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v68
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v66
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(10)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v54
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v54
+; ALIGNED-NEXT:    buffer_store_dword v70, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v70 offset:202
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v71 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:204
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:200
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v69 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:196
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v68 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:192
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 24, v55
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:207
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v51
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v51
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v49
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v49
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_store_dword v65, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v64, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v66 offset:186
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v67 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:188
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:184
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v65 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:180
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v64 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:176
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v48
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:187
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v38
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v39
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:179
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v34
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v34
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v54 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:168
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v55 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:172
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v52 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:160
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v53 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:164
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v116, 8, v33
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:173
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 24, v31
+; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v29
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:167
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v29
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v50 offset:154
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v51 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:156
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:152
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v49 offset:150
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:148
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v48 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:144
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:155
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:153
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:159
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:157
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:151
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:149
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:147
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:145
+; ALIGNED-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v38 offset:138
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v39 offset:142
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v39 offset:140
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v38 offset:136
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v37 offset:134
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v37 offset:132
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v36 offset:130
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v36 offset:128
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:143
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:139
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:137
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:141
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:135
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:133
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:131
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:129
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v34 offset:122
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v35 offset:126
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v35 offset:124
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v34 offset:120
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v33 offset:118
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v33 offset:116
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v32 offset:114
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 24, v14
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:123
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:121
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:127
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:125
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:119
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v116 offset:117
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:115
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:113
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v30 offset:106
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v31 offset:110
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v31 offset:108
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v30 offset:104
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v29 offset:102
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v29 offset:100
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v28 offset:98
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v28 offset:96
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:111
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:109
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v6
+; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v11
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:103
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v17
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v17
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 24, v16
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v16
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 8, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v9
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:107
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:105
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v7
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v5
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v5
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v26 offset:90
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v27 offset:94
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v27 offset:92
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v26 offset:88
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v25 offset:86
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v25 offset:84
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v24 offset:82
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v24 offset:80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v4
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:91
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:89
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:95
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:93
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:83
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:81
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v22 offset:74
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v23 offset:78
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v23 offset:76
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v22 offset:72
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v21 offset:70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v21 offset:68
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v20 offset:66
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v20 offset:64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:75
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:73
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v36 offset:79
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v37 offset:77
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v38 offset:71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v39 offset:69
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:65
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:59
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v18 offset:58
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:57
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v19 offset:62
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:63
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v19 offset:60
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:61
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v18 offset:56
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:55
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v17 offset:54
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v17 offset:52
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v32 offset:51
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v16 offset:50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v33 offset:49
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v16 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v14 offset:42
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v34 offset:43
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v35 offset:41
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v15 offset:46
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:47
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v15 offset:44
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:45
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v14 offset:40
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:39
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v13 offset:38
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v13 offset:36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:35
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v12 offset:34
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:33
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v12 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v10 offset:26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:27
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:25
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v11 offset:30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v28 offset:31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v11 offset:28
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v29 offset:29
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v10 offset:24
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v30 offset:23
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v9 offset:22
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v31 offset:21
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v9 offset:20
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:19
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v8 offset:18
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:17
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v8 offset:16
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v6 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:9
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v7 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v7 offset:12
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v6 offset:8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:7
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v5 offset:6
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:5
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v5 offset:4
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v24 offset:3
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v4 offset:2
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v25 offset:1
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v4
+; ALIGNED-NEXT:    s_cbranch_vccnz .LBB2_1
+; ALIGNED-NEXT:  ; %bb.2: ; %memcpy-split
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memcpy_p0_p4_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB2_1: ; %load-store-loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[12:13], off offset:16
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[12:13], off
+; UNROLL3-NEXT:    global_load_dwordx4 v[12:15], v[12:13], off offset:32
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7] offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; UNROLL3-NEXT:    s_cbranch_vccnz .LBB2_1
+; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
+; UNROLL3-NEXT:    s_clause 0x1
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2016
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[8:11] offset:2032
+; UNROLL3-NEXT:    s_waitcnt lgkmcnt(0)
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memcpy_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
+; CHECK-LABEL: memcpy_p5_p5_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB3_1: ; %load-store-loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_clause 0x3e
+; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    buffer_load_dword v10, v1, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    buffer_load_dword v11, v1, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    buffer_load_dword v12, v1, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    buffer_load_dword v13, v1, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    buffer_load_dword v14, v1, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    buffer_load_dword v15, v1, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    buffer_load_dword v16, v1, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    buffer_load_dword v17, v1, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    buffer_load_dword v18, v1, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    buffer_load_dword v19, v1, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    buffer_load_dword v20, v1, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    buffer_load_dword v21, v1, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    buffer_load_dword v22, v1, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    buffer_load_dword v23, v1, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    buffer_load_dword v24, v1, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    buffer_load_dword v25, v1, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    buffer_load_dword v26, v1, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    buffer_load_dword v27, v1, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    buffer_load_dword v28, v1, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    buffer_load_dword v29, v1, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    buffer_load_dword v30, v1, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    buffer_load_dword v31, v1, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    buffer_load_dword v32, v1, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    buffer_load_dword v33, v1, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    buffer_load_dword v34, v1, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    buffer_load_dword v35, v1, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    buffer_load_dword v36, v1, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    buffer_load_dword v37, v1, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    buffer_load_dword v38, v1, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    buffer_load_dword v39, v1, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    buffer_load_dword v48, v1, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    buffer_load_dword v49, v1, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    buffer_load_dword v50, v1, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    buffer_load_dword v51, v1, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    buffer_load_dword v52, v1, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    buffer_load_dword v53, v1, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    buffer_load_dword v54, v1, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    buffer_load_dword v55, v1, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    buffer_load_dword v64, v1, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    buffer_load_dword v65, v1, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    buffer_load_dword v66, v1, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    buffer_load_dword v67, v1, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    buffer_load_dword v68, v1, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    buffer_load_dword v69, v1, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    buffer_load_dword v70, v1, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    buffer_load_dword v71, v1, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    buffer_load_dword v80, v1, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    buffer_load_dword v81, v1, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    buffer_load_dword v82, v1, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    buffer_load_dword v83, v1, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    buffer_load_dword v84, v1, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    buffer_load_dword v85, v1, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    buffer_load_dword v86, v1, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    buffer_load_dword v87, v1, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    buffer_load_dword v96, v1, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    buffer_load_dword v97, v1, s[0:3], 0 offen
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
+; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(62)
+; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    s_waitcnt vmcnt(61)
+; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    s_waitcnt vmcnt(60)
+; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(59)
+; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    s_waitcnt vmcnt(58)
+; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    s_waitcnt vmcnt(57)
+; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    s_waitcnt vmcnt(56)
+; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(55)
+; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    s_waitcnt vmcnt(54)
+; CHECK-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    s_waitcnt vmcnt(53)
+; CHECK-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    s_waitcnt vmcnt(52)
+; CHECK-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(51)
+; CHECK-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    s_waitcnt vmcnt(50)
+; CHECK-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    s_waitcnt vmcnt(49)
+; CHECK-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    s_waitcnt vmcnt(48)
+; CHECK-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(47)
+; CHECK-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    s_waitcnt vmcnt(46)
+; CHECK-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    s_waitcnt vmcnt(45)
+; CHECK-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    s_waitcnt vmcnt(44)
+; CHECK-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(43)
+; CHECK-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    s_waitcnt vmcnt(42)
+; CHECK-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    s_waitcnt vmcnt(41)
+; CHECK-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    s_waitcnt vmcnt(40)
+; CHECK-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(39)
+; CHECK-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    s_waitcnt vmcnt(38)
+; CHECK-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    s_waitcnt vmcnt(37)
+; CHECK-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    s_waitcnt vmcnt(36)
+; CHECK-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(35)
+; CHECK-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    s_waitcnt vmcnt(34)
+; CHECK-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    s_waitcnt vmcnt(33)
+; CHECK-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    s_waitcnt vmcnt(32)
+; CHECK-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(31)
+; CHECK-NEXT:    buffer_store_dword v34, v0, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    s_waitcnt vmcnt(30)
+; CHECK-NEXT:    buffer_store_dword v35, v0, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    s_waitcnt vmcnt(29)
+; CHECK-NEXT:    buffer_store_dword v36, v0, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    s_waitcnt vmcnt(28)
+; CHECK-NEXT:    buffer_store_dword v37, v0, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(27)
+; CHECK-NEXT:    buffer_store_dword v38, v0, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    s_waitcnt vmcnt(26)
+; CHECK-NEXT:    buffer_store_dword v39, v0, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    s_waitcnt vmcnt(25)
+; CHECK-NEXT:    buffer_store_dword v48, v0, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    s_waitcnt vmcnt(24)
+; CHECK-NEXT:    buffer_store_dword v49, v0, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(23)
+; CHECK-NEXT:    buffer_store_dword v50, v0, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    s_waitcnt vmcnt(22)
+; CHECK-NEXT:    buffer_store_dword v51, v0, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    s_waitcnt vmcnt(21)
+; CHECK-NEXT:    buffer_store_dword v52, v0, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    s_waitcnt vmcnt(20)
+; CHECK-NEXT:    buffer_store_dword v53, v0, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(19)
+; CHECK-NEXT:    buffer_store_dword v54, v0, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    s_waitcnt vmcnt(18)
+; CHECK-NEXT:    buffer_store_dword v55, v0, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    s_waitcnt vmcnt(17)
+; CHECK-NEXT:    buffer_store_dword v64, v0, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    s_waitcnt vmcnt(16)
+; CHECK-NEXT:    buffer_store_dword v65, v0, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    buffer_store_dword v66, v0, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    buffer_store_dword v67, v0, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    buffer_store_dword v68, v0, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    buffer_store_dword v69, v0, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    buffer_store_dword v70, v0, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    buffer_store_dword v71, v0, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    buffer_store_dword v80, v0, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    buffer_store_dword v81, v0, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    buffer_store_dword v82, v0, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    buffer_store_dword v83, v0, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    buffer_store_dword v84, v0, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    buffer_store_dword v85, v0, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    buffer_store_dword v86, v0, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    buffer_store_dword v87, v0, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    buffer_store_dword v96, v0, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    buffer_store_dword v97, v0, s[0:3], 0 offen
+; CHECK-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; CHECK-NEXT:    s_cbranch_vccnz .LBB3_1
+; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memcpy_p5_p5_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:  .LBB3_1: ; %load-store-loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    s_clause 0x34
+; ALIGNED-NEXT:    buffer_load_ubyte v116, v1, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_load_ubyte v117, v1, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_load_ubyte v118, v1, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_load_ubyte v119, v1, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_load_ubyte v40, v1, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_load_ubyte v41, v1, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    buffer_load_ubyte v42, v1, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v1, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v1, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_load_ubyte v45, v1, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_load_ubyte v46, v1, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_load_ubyte v47, v1, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_load_ubyte v56, v1, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v1, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v1, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_load_ubyte v59, v1, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_load_ubyte v60, v1, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_load_ubyte v61, v1, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_load_ubyte v62, v1, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_load_ubyte v63, v1, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_load_ubyte v72, v1, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_load_ubyte v73, v1, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_load_ubyte v74, v1, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_load_ubyte v75, v1, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_load_ubyte v76, v1, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    buffer_load_ubyte v77, v1, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_load_ubyte v78, v1, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_load_ubyte v79, v1, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v1, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_load_ubyte v89, v1, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v1, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v1, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v1, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    buffer_load_ubyte v93, v1, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_load_ubyte v94, v1, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v1, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_load_ubyte v104, v1, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_load_ubyte v105, v1, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v1, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_load_ubyte v107, v1, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_load_ubyte v108, v1, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_load_ubyte v109, v1, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_load_ubyte v110, v1, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_load_ubyte v111, v1, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_load_ubyte v120, v1, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    buffer_load_ubyte v121, v1, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    buffer_load_ubyte v122, v1, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_load_ubyte v123, v1, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_load_ubyte v124, v1, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v1, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_load_ubyte v126, v1, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_load_ubyte v127, v1, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x3e
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v1, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v1, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v1, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v1, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v1, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v1, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v1, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v1, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v1, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v1, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v1, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v1, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v1, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v1, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v1, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v1, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v1, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v1, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v1, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v1, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v1, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v1, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v1, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v1, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v1, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v1, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v1, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v1, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v1, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v1, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v1, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v1, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v1, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v1, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v1, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v1, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v1, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v1, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v1, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v1, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v1, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v1, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v1, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v1, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v1, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v1, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v1, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v1, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v1, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v1, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v1, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v1, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v1, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_load_ubyte v82, v1, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_load_ubyte v83, v1, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_load_ubyte v84, v1, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_load_ubyte v85, v1, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    buffer_load_ubyte v86, v1, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    buffer_load_ubyte v87, v1, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_load_ubyte v96, v1, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_load_ubyte v97, v1, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    s_clause 0xa
+; ALIGNED-NEXT:    buffer_load_ubyte v98, v1, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_load_ubyte v99, v1, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_load_ubyte v100, v1, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_load_ubyte v101, v1, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    buffer_load_ubyte v102, v1, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_load_ubyte v103, v1, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_load_ubyte v112, v1, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    buffer_load_ubyte v113, v1, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_load_ubyte v114, v1, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_load_ubyte v115, v1, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_byte v116, v0, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_store_byte v117, v0, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_store_byte v118, v0, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_store_byte v119, v0, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_store_byte v40, v0, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_store_byte v41, v0, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    buffer_store_byte v42, v0, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_store_byte v43, v0, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_store_byte v44, v0, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_store_byte v45, v0, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_store_byte v46, v0, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_store_byte v47, v0, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_store_byte v56, v0, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_store_byte v57, v0, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_store_byte v58, v0, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_store_byte v59, v0, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_store_byte v60, v0, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_store_byte v61, v0, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_store_byte v62, v0, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_store_byte v63, v0, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_store_byte v72, v0, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_store_byte v73, v0, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_store_byte v74, v0, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_store_byte v75, v0, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_store_byte v76, v0, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    buffer_store_byte v77, v0, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_store_byte v78, v0, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_store_byte v79, v0, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_store_byte v88, v0, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_store_byte v89, v0, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_store_byte v90, v0, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_store_byte v91, v0, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_store_byte v92, v0, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    buffer_store_byte v93, v0, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_store_byte v94, v0, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_store_byte v95, v0, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_store_byte v104, v0, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_store_byte v105, v0, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_store_byte v106, v0, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_store_byte v107, v0, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_store_byte v108, v0, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_store_byte v109, v0, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_store_byte v110, v0, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_store_byte v111, v0, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_store_byte v120, v0, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    buffer_store_byte v121, v0, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    buffer_store_byte v122, v0, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_store_byte v123, v0, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_store_byte v124, v0, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_store_byte v125, v0, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_store_byte v126, v0, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_store_byte v127, v0, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    buffer_store_byte v3, v0, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    buffer_store_byte v4, v0, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_store_byte v5, v0, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_store_byte v6, v0, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    buffer_store_byte v7, v0, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_store_byte v11, v0, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_store_byte v12, v0, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_store_byte v13, v0, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_store_byte v14, v0, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_store_byte v15, v0, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_store_byte v16, v0, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_store_byte v17, v0, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_store_byte v18, v0, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    buffer_store_byte v19, v0, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    buffer_store_byte v20, v0, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_store_byte v21, v0, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_store_byte v22, v0, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    buffer_store_byte v23, v0, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_store_byte v24, v0, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_store_byte v25, v0, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_store_byte v26, v0, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_store_byte v27, v0, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_store_byte v28, v0, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_store_byte v29, v0, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_store_byte v30, v0, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_store_byte v31, v0, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_store_byte v32, v0, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_store_byte v33, v0, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_store_byte v34, v0, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    buffer_store_byte v35, v0, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    buffer_store_byte v36, v0, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_store_byte v37, v0, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_store_byte v38, v0, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    buffer_store_byte v39, v0, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_store_byte v48, v0, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_store_byte v49, v0, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_store_byte v50, v0, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_store_byte v51, v0, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_store_byte v52, v0, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_store_byte v53, v0, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_store_byte v54, v0, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_store_byte v55, v0, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_store_byte v64, v0, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_store_byte v65, v0, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_store_byte v66, v0, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    buffer_store_byte v67, v0, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    buffer_store_byte v68, v0, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_store_byte v69, v0, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_store_byte v70, v0, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    buffer_store_byte v71, v0, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_store_byte v80, v0, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_store_byte v81, v0, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_store_byte v82, v0, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_store_byte v83, v0, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_store_byte v84, v0, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_store_byte v85, v0, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    buffer_store_byte v86, v0, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    buffer_store_byte v87, v0, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_store_byte v96, v0, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_store_byte v97, v0, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    buffer_store_byte v98, v0, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_store_byte v99, v0, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_store_byte v100, v0, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_store_byte v101, v0, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    buffer_store_byte v102, v0, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_store_byte v103, v0, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_store_byte v112, v0, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    buffer_store_byte v113, v0, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_store_byte v114, v0, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_store_byte v115, v0, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
+; ALIGNED-NEXT:    s_cbranch_vccnz .LBB3_1
+; ALIGNED-NEXT:  ; %bb.2: ; %memcpy-split
+; ALIGNED-NEXT:    s_clause 0x2f
+; ALIGNED-NEXT:    buffer_load_dword v127, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memcpy_p5_p5_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    v_mov_b32_e32 v2, v1
+; UNROLL3-NEXT:    v_mov_b32_e32 v3, v0
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:  .LBB3_1: ; %load-store-loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    s_clause 0xb
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    buffer_load_dword v11, v2, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    buffer_load_dword v12, v2, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    buffer_load_dword v13, v2, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    buffer_load_dword v14, v2, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    buffer_load_dword v15, v2, s[0:3], 0 offen
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v2, 48, v2
+; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(11)
+; UNROLL3-NEXT:    buffer_store_dword v4, v3, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    s_waitcnt vmcnt(10)
+; UNROLL3-NEXT:    buffer_store_dword v5, v3, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    s_waitcnt vmcnt(9)
+; UNROLL3-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    s_waitcnt vmcnt(8)
+; UNROLL3-NEXT:    buffer_store_dword v7, v3, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    s_waitcnt vmcnt(7)
+; UNROLL3-NEXT:    buffer_store_dword v8, v3, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    s_waitcnt vmcnt(6)
+; UNROLL3-NEXT:    buffer_store_dword v9, v3, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    s_waitcnt vmcnt(5)
+; UNROLL3-NEXT:    buffer_store_dword v10, v3, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
+; UNROLL3-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v12, v3, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v13, v3, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v14, v3, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v15, v3, s[0:3], 0 offen
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v3, 48, v3
+; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; UNROLL3-NEXT:    s_cbranch_vccnz .LBB3_1
+; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memcpy_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
+; CHECK-LABEL: memcpy_p0_p5_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB4_1: ; %load-store-loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_clause 0x3e
+; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    buffer_load_dword v11, v2, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    buffer_load_dword v12, v2, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    buffer_load_dword v13, v2, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    buffer_load_dword v14, v2, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    buffer_load_dword v18, v2, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    buffer_load_dword v17, v2, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    buffer_load_dword v16, v2, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    buffer_load_dword v15, v2, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    buffer_load_dword v22, v2, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    buffer_load_dword v21, v2, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    buffer_load_dword v20, v2, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    buffer_load_dword v19, v2, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    buffer_load_dword v26, v2, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_load_dword v25, v2, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    buffer_load_dword v24, v2, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    buffer_load_dword v23, v2, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    buffer_load_dword v30, v2, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    buffer_load_dword v29, v2, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    buffer_load_dword v28, v2, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    buffer_load_dword v27, v2, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    buffer_load_dword v34, v2, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    buffer_load_dword v33, v2, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    buffer_load_dword v32, v2, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    buffer_load_dword v31, v2, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    buffer_load_dword v38, v2, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    buffer_load_dword v37, v2, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    buffer_load_dword v36, v2, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    buffer_load_dword v35, v2, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    buffer_load_dword v51, v2, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    buffer_load_dword v50, v2, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    buffer_load_dword v49, v2, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    buffer_load_dword v48, v2, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    buffer_load_dword v55, v2, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    buffer_load_dword v54, v2, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    buffer_load_dword v53, v2, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    buffer_load_dword v52, v2, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    buffer_load_dword v67, v2, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    buffer_load_dword v66, v2, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    buffer_load_dword v65, v2, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    buffer_load_dword v64, v2, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    buffer_load_dword v71, v2, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    buffer_load_dword v70, v2, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    buffer_load_dword v69, v2, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    buffer_load_dword v68, v2, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    buffer_load_dword v83, v2, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    buffer_load_dword v82, v2, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    buffer_load_dword v81, v2, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    buffer_load_dword v80, v2, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    buffer_load_dword v87, v2, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    buffer_load_dword v86, v2, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    buffer_load_dword v85, v2, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    buffer_load_dword v84, v2, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    buffer_load_dword v96, v2, s[0:3], 0 offen
+; CHECK-NEXT:    buffer_load_dword v97, v2, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    buffer_load_dword v98, v2, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    buffer_load_dword v99, v2, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    v_add_nc_u32_e32 v2, 0x100, v2
+; CHECK-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; CHECK-NEXT:    s_waitcnt vmcnt(41)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[23:26] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(37)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[27:30] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(33)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[31:34] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(29)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[35:38] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(25)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(21)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(17)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:128
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[15:18] offset:112
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[19:22] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:64
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[11:14] offset:48
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[7:10] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[3:6] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
+; CHECK-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; CHECK-NEXT:    s_cbranch_vccnz .LBB4_1
+; CHECK-NEXT:  ; %bb.2: ; %memcpy-split
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memcpy_p0_p5_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill
+; ALIGNED-NEXT:  .LBB4_1: ; %load-store-loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    s_clause 0x39
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v2, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_load_ubyte v127, v2, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    s_waitcnt vmcnt(57)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(56)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(55)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(54)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(53)
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(52)
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(51)
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(50)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(49)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(48)
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(45)
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(44)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(43)
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v9, 8, v5
+; ALIGNED-NEXT:    s_waitcnt vmcnt(41)
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v8, 8, v6
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v10, 8, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v11, 8, v12
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v15, 8, v14
+; ALIGNED-NEXT:    v_lshl_or_b32 v8, v19, 8, v17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(40)
+; ALIGNED-NEXT:    v_lshl_or_b32 v9, v16, 8, v13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(38)
+; ALIGNED-NEXT:    v_lshl_or_b32 v10, v20, 8, v18
+; ALIGNED-NEXT:    s_waitcnt vmcnt(36)
+; ALIGNED-NEXT:    v_lshl_or_b32 v11, v23, 8, v22
+; ALIGNED-NEXT:    s_waitcnt vmcnt(34)
+; ALIGNED-NEXT:    v_lshl_or_b32 v12, v28, 8, v25
+; ALIGNED-NEXT:    s_waitcnt vmcnt(32)
+; ALIGNED-NEXT:    v_lshl_or_b32 v13, v24, 8, v21
+; ALIGNED-NEXT:    s_waitcnt vmcnt(30)
+; ALIGNED-NEXT:    v_lshl_or_b32 v14, v27, 8, v26
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v6, 16, v5
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v8, 16, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v10, 16, v9
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v12, 16, v11
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v14, 16, v13
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(28)
+; ALIGNED-NEXT:    v_lshl_or_b32 v15, v31, 8, v30
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(26)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v34, 8, v33
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(24)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v37, 8, v32
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(22)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v36, 8, v35
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(17)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v50, 8, v38
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v49, 8, v39
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v51, 8, v48
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v53, 8, v52
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v0, 16, v15
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v5, 16, v4
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v7, 16, v6
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v55, 8, v29
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v67, 8, v66
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v64, 8, v54
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v68, 8, v65
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v70, 8, v69
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v80, 8, v71
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v64, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v65, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v70, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v80, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v81, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:792 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x5
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v2, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_load_ubyte v126, v2, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_load_ubyte v123, v2, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v126, 8, v125
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v123, 8, v5
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 8, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v121, v2, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_load_ubyte v109, v2, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_load_ubyte v108, v2, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_load_ubyte v107, v2, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v2, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v109, 8, v121
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v107, 8, v108
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v105, v2, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_load_ubyte v93, v2, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v93, 8, v105
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v106, 8, v91
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v89, v2, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_load_ubyte v78, v2, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_load_ubyte v73, v2, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_load_ubyte v74, v2, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_load_ubyte v79, v2, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_load_ubyte v75, v2, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_load_ubyte v76, v2, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_load_ubyte v72, v2, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v78, 8, v89
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v74, 8, v73
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v72, 8, v76
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v75, 8, v79
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v63, v2, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_load_ubyte v61, v2, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_load_ubyte v62, v2, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_load_ubyte v60, v2, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v2, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v61, 8, v63
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v60, 8, v62
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v59, v2, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_load_ubyte v56, v2, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_load_ubyte v47, v2, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v56, 8, v59
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v57, 8, v47
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v46, v2, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v2, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_load_ubyte v119, v2, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_load_ubyte v40, v2, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_load_ubyte v45, v2, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_load_ubyte v41, v2, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_load_ubyte v42, v2, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_load_ubyte v118, v2, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v43, 8, v46
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v40, 8, v119
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v118, 8, v42
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v41, 8, v45
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v117, v2, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_load_ubyte v115, v2, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_load_ubyte v116, v2, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_load_ubyte v114, v2, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_load_ubyte v112, v2, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v115, 8, v117
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v114, 8, v116
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v113, v2, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_load_ubyte v103, v2, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_load_ubyte v102, v2, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v103, 8, v113
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v112, 8, v102
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v100, v2, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_load_ubyte v98, v2, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_load_ubyte v87, v2, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_load_ubyte v86, v2, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_load_ubyte v99, v2, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_load_ubyte v97, v2, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_load_ubyte v96, v2, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_load_ubyte v85, v2, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v98, 8, v100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v86, 8, v87
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v85, 8, v96
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v97, 8, v99
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v83, v2, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_load_ubyte v82, v2, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v2, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v81, 8, v83
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v80, 8, v82
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v69, 8, v70
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v71, 8, v68
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v54, 8, v67
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v52, 8, v65
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v53, 8, v66
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v48, 8, v49
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v51, 8, v64
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v39, 8, v50
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v36, 8, v38
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v55, 8, v37
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v33, 8, v35
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v30, 8, v29
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v31, 8, v34
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v28, 8, v32
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x17
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    v_lshl_or_b32 v124, v4, 16, v3
+; ALIGNED-NEXT:    s_clause 0x5
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen
+; ALIGNED-NEXT:    buffer_load_ubyte v94, v2, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v2, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v2, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v2, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v2, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    s_waitcnt vmcnt(28)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v25, 8, v27
+; ALIGNED-NEXT:    s_waitcnt vmcnt(26)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v24, 8, v26
+; ALIGNED-NEXT:    s_waitcnt vmcnt(14)
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v12, 8, v16
+; ALIGNED-NEXT:    s_waitcnt vmcnt(10)
+; ALIGNED-NEXT:    v_lshl_or_b32 v58, v8, 8, v10
+; ALIGNED-NEXT:    v_lshl_or_b32 v104, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v21, 8, v22
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v23, 8, v20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v17, 8, v19
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v14, 8, v13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v101, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v15, 8, v18
+; ALIGNED-NEXT:    v_lshl_or_b32 v84, v44, 16, v4
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v9, 8, v11
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v58, 16, v44
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v5, 8, v6
+; ALIGNED-NEXT:    v_lshl_or_b32 v58, v7, 8, v1
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v58, 16, v44
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v2, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v2, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v44, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v58, v58, 8, v94
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v58, 16, v44
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v90, 8, v88
+; ALIGNED-NEXT:    v_lshl_or_b32 v58, v95, 8, v92
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v58, 16, v44
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v122, v2, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    buffer_load_ubyte v111, v2, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    buffer_load_ubyte v120, v2, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    buffer_load_ubyte v110, v2, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    buffer_load_ubyte v94, v2, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v111, 8, v122
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v58, v110, 8, v120
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v58, 16, v44
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v2, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v2, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v2, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v44, v92, 8, v95
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v58, v94, 8, v90
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v58, 16, v44
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v2, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v2, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v2, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v2, 0x100, v2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v127, 8, v44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v127, v58, 8, v88
+; ALIGNED-NEXT:    v_lshl_or_b32 v127, v0, 16, v127
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_add_co_u32 v3, vcc_lo, v0, s4
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, s5, v0, vcc_lo
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v1 offset:250
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v7 offset:251
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v5 offset:249
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v8 offset:255
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v9 offset:253
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v10 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v11 offset:252
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v6 offset:248
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v13 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v14 offset:243
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v17 offset:241
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v12 offset:247
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v15 offset:245
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v16 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v18 offset:244
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v19 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    v_cmp_gt_u64_e64 s6, 0x800, s[4:5]
+; ALIGNED-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v20 offset:234
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v23 offset:235
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v21 offset:233
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v24 offset:239
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v25 offset:237
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v26 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v27 offset:236
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v22 offset:232
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v29 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v30 offset:227
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v33 offset:225
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v28 offset:231
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v31 offset:229
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v32 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v34 offset:228
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v35 offset:224
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v54 offset:213
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v52 offset:215
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v36 offset:209
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v55 offset:211
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v37 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v65 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v67 offset:212
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v49 offset:218
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v48 offset:219
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v53 offset:217
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v39 offset:223
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v51 offset:221
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v50 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v64 offset:220
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v66 offset:216
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v38 offset:208
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v68 offset:202
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v71 offset:203
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v69 offset:201
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v80 offset:207
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v81 offset:205
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v82 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v83 offset:204
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v70 offset:200
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v87 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v86 offset:195
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v98 offset:193
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v85 offset:199
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v97 offset:197
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v96 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v99 offset:196
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v100 offset:192
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v102 offset:186
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v112 offset:187
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v103 offset:185
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v114 offset:191
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v115 offset:189
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v116 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v117 offset:188
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v113 offset:184
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v119 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v40 offset:179
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v43 offset:177
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v118 offset:183
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v41 offset:181
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v42 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v45 offset:180
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v46 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v47 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v57 offset:171
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v56 offset:169
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v60 offset:175
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v61 offset:173
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v62 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v63 offset:172
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v59 offset:168
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v73 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v74 offset:163
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v78 offset:161
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v72 offset:167
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v75 offset:165
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v76 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v79 offset:164
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v89 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v91 offset:154
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v106 offset:155
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v93 offset:153
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v107 offset:159
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v109 offset:157
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v108 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v121 offset:156
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v105 offset:152
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v125 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v126 offset:147
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:145
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v123 offset:151
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:149
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:150
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:138
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:139
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:137
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:143
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:141
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:142
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:130
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:131
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:129
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:135
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:133
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:134
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:122
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:123
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:121
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:127
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:125
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:126
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:114
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:115
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:113
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:119
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:117
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:118
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:106
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:107
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:105
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:111
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:109
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:110
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:98
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:99
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:97
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:103
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:101
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:102
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:90
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:91
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:89
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:95
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:792 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:93
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:94
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:82
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:83
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:81
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:87
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:85
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:86
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:74
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:75
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:73
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:79
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:77
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:78
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:66
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:67
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:65
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:71
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:69
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:70
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:61
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:58
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:59
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:57
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:63
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:62
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:53
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:50
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:51
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:49
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:55
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:54
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:43
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:42
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:41
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:47
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:46
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:45
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:35
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:34
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:33
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:39
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:38
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:37
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:26
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:27
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:25
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:31
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:29
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:30
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:24
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v44 offset:18
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:19
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v58 offset:17
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:23
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:21
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:22
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:20
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v88 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v90 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v94 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v111 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v92 offset:9
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v110 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v120 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v122 offset:12
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v95 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:2
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:3
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:1
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:7
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:5
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:6
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0
+; ALIGNED-NEXT:    s_cbranch_vccnz .LBB4_1
+; ALIGNED-NEXT:  ; %bb.2: ; %memcpy-split
+; ALIGNED-NEXT:    s_clause 0x2f
+; ALIGNED-NEXT:    buffer_load_dword v127, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memcpy_p0_p5_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    v_mov_b32_e32 v3, v2
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    s_inst_prefetch 0x1
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB4_1: ; %load-store-loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    s_clause 0xb
+; UNROLL3-NEXT:    buffer_load_dword v4, v3, s[0:3], 0 offen
+; UNROLL3-NEXT:    buffer_load_dword v5, v3, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    buffer_load_dword v6, v3, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    buffer_load_dword v7, v3, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    buffer_load_dword v8, v3, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    buffer_load_dword v9, v3, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    buffer_load_dword v10, v3, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    buffer_load_dword v11, v3, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    buffer_load_dword v12, v3, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    buffer_load_dword v13, v3, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    buffer_load_dword v14, v3, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    buffer_load_dword v15, v3, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v3, 48, v3
+; UNROLL3-NEXT:    v_cmp_gt_u64_e64 s6, 0x7e0, s[4:5]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    s_and_b32 vcc_lo, exec_lo, s6
+; UNROLL3-NEXT:    s_cbranch_vccnz .LBB4_1
+; UNROLL3-NEXT:  ; %bb.2: ; %memcpy-split
+; UNROLL3-NEXT:    s_inst_prefetch 0x2
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:2016
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:2032
+; UNROLL3-NEXT:    s_waitcnt lgkmcnt(0)
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+
+; memmove for address spaces 0, 1, 4, 5
+
+define void @memmove_p0_p0_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(0) align 1 readonly %src) {
+; CHECK-LABEL: memmove_p0_p0_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, exec_lo
+; CHECK-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; CHECK-NEXT:    s_xor_b32 s6, exec_lo, s4
+; CHECK-NEXT:    s_cbranch_execz .LBB5_3
+; CHECK-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB5_2: ; %memmove_fwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[96:97] offset:224
+; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[96:97] offset:240
+; CHECK-NEXT:    flat_load_dwordx4 v[12:15], v[96:97] offset:192
+; CHECK-NEXT:    flat_load_dwordx4 v[16:19], v[96:97] offset:208
+; CHECK-NEXT:    flat_load_dwordx4 v[20:23], v[96:97] offset:160
+; CHECK-NEXT:    flat_load_dwordx4 v[24:27], v[96:97] offset:176
+; CHECK-NEXT:    flat_load_dwordx4 v[28:31], v[96:97] offset:128
+; CHECK-NEXT:    flat_load_dwordx4 v[32:35], v[96:97] offset:144
+; CHECK-NEXT:    flat_load_dwordx4 v[36:39], v[96:97] offset:96
+; CHECK-NEXT:    flat_load_dwordx4 v[48:51], v[96:97] offset:112
+; CHECK-NEXT:    flat_load_dwordx4 v[52:55], v[96:97] offset:64
+; CHECK-NEXT:    flat_load_dwordx4 v[64:67], v[96:97] offset:80
+; CHECK-NEXT:    flat_load_dwordx4 v[68:71], v[96:97] offset:32
+; CHECK-NEXT:    flat_load_dwordx4 v[80:83], v[96:97] offset:48
+; CHECK-NEXT:    flat_load_dwordx4 v[84:87], v[96:97]
+; CHECK-NEXT:    flat_load_dwordx4 v[96:99], v[96:97] offset:16
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(15) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[4:7] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(14) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[8:11] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(13) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[12:15] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(12) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[16:19] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(11) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[20:23] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(10) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[24:27] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(9) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[28:31] offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(8) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[32:35] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(7) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[36:39] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(6) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(5) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99] offset:16
+; CHECK-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; CHECK-NEXT:    s_cbranch_scc1 .LBB5_2
+; CHECK-NEXT:  .LBB5_3: ; %Flow5
+; CHECK-NEXT:    s_andn2_saveexec_b32 s8, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB5_6
+; CHECK-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; CHECK-NEXT:    s_movk_i32 s6, 0xff00
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0x700
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:  .LBB5_5: ; %memmove_bwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    flat_load_dwordx4 v[4:7], v[96:97] offset:224
+; CHECK-NEXT:    flat_load_dwordx4 v[8:11], v[96:97] offset:240
+; CHECK-NEXT:    flat_load_dwordx4 v[12:15], v[96:97] offset:192
+; CHECK-NEXT:    flat_load_dwordx4 v[16:19], v[96:97] offset:208
+; CHECK-NEXT:    flat_load_dwordx4 v[20:23], v[96:97] offset:160
+; CHECK-NEXT:    flat_load_dwordx4 v[24:27], v[96:97] offset:176
+; CHECK-NEXT:    flat_load_dwordx4 v[28:31], v[96:97] offset:128
+; CHECK-NEXT:    flat_load_dwordx4 v[32:35], v[96:97] offset:144
+; CHECK-NEXT:    flat_load_dwordx4 v[36:39], v[96:97] offset:96
+; CHECK-NEXT:    flat_load_dwordx4 v[48:51], v[96:97] offset:112
+; CHECK-NEXT:    flat_load_dwordx4 v[52:55], v[96:97] offset:64
+; CHECK-NEXT:    flat_load_dwordx4 v[64:67], v[96:97] offset:80
+; CHECK-NEXT:    flat_load_dwordx4 v[68:71], v[96:97] offset:32
+; CHECK-NEXT:    flat_load_dwordx4 v[80:83], v[96:97] offset:48
+; CHECK-NEXT:    flat_load_dwordx4 v[84:87], v[96:97]
+; CHECK-NEXT:    flat_load_dwordx4 v[96:99], v[96:97] offset:16
+; CHECK-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; CHECK-NEXT:    s_addc_u32 s5, s5, -1
+; CHECK-NEXT:    s_waitcnt vmcnt(15) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[4:7] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(14) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[8:11] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(13) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[12:15] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(12) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[16:19] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(11) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[20:23] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(10) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[24:27] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(9) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[28:31] offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(8) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[32:35] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(7) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[36:39] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(6) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(5) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(4) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(3) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87]
+; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99] offset:16
+; CHECK-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_cbranch_scc0 .LBB5_5
+; CHECK-NEXT:  .LBB5_6: ; %Flow6
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memmove_p0_p0_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_mov_b32 s4, exec_lo
+; ALIGNED-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; ALIGNED-NEXT:    s_xor_b32 s6, exec_lo, s4
+; ALIGNED-NEXT:    s_cbranch_execz .LBB5_3
+; ALIGNED-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:  .LBB5_2: ; %memmove_fwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v20, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    flat_load_dwordx4 v[16:19], v[20:21] offset:240
+; ALIGNED-NEXT:    flat_load_dwordx4 v[22:25], v[20:21] offset:224
+; ALIGNED-NEXT:    flat_load_dwordx4 v[4:7], v[20:21]
+; ALIGNED-NEXT:    flat_load_dwordx4 v[8:11], v[20:21] offset:16
+; ALIGNED-NEXT:    flat_load_dwordx4 v[12:15], v[20:21] offset:32
+; ALIGNED-NEXT:    flat_load_dwordx4 v[98:101], v[20:21] offset:48
+; ALIGNED-NEXT:    flat_load_dwordx4 v[112:115], v[20:21] offset:64
+; ALIGNED-NEXT:    flat_load_dwordx4 v[82:85], v[20:21] offset:80
+; ALIGNED-NEXT:    flat_load_dwordx4 v[116:119], v[20:21] offset:96
+; ALIGNED-NEXT:    flat_load_dwordx4 v[66:69], v[20:21] offset:112
+; ALIGNED-NEXT:    flat_load_dwordx4 v[40:43], v[20:21] offset:128
+; ALIGNED-NEXT:    flat_load_dwordx4 v[50:53], v[20:21] offset:144
+; ALIGNED-NEXT:    flat_load_dwordx4 v[44:47], v[20:21] offset:160
+; ALIGNED-NEXT:    flat_load_dwordx4 v[34:37], v[20:21] offset:176
+; ALIGNED-NEXT:    flat_load_dwordx4 v[30:33], v[20:21] offset:192
+; ALIGNED-NEXT:    flat_load_dwordx4 v[26:29], v[20:21] offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15) lgkmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v21 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v20 offset:250
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v19 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v18 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:240
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(22)
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 8, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 8, v20
+; ALIGNED-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v25 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v24 offset:234
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v23 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v22 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:224
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(16)
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v29 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v28 offset:218
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v27 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v26 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v33 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v32 offset:202
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v31 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:196
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v30 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v37 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v36 offset:186
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:184
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v35 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:180
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v34 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v49 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:172
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v48 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:168
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v39 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:164
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v38 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v53 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:156
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v52 offset:154
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:152
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v51 offset:150
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v50 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v65 offset:142
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v64 offset:138
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v55 offset:134
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v55 offset:132
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v54 offset:130
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v54 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v69 offset:126
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v69 offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v68 offset:122
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v68 offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v67 offset:118
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v66 offset:114
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v116, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v117, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_store_dword v118, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_store_dword v119, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v81 offset:110
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v80 offset:106
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v71 offset:102
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v70 offset:98
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_store_dword v85, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v85, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v84, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v85 offset:94
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v84 offset:90
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v84 offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v83 offset:86
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v82 offset:82
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v97, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    buffer_load_dword v96, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v87, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v86, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 8, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 8, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 8, v24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v97 offset:78
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v96 offset:74
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v87 offset:70
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v86 offset:66
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v101, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_load_dword v100, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_load_dword v99, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_load_dword v98, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v101 offset:62
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v100 offset:58
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v99 offset:54
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v98 offset:50
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v15 offset:42
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v15 offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v14 offset:46
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v14 offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v13 offset:34
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v13 offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v12 offset:38
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v12 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v11 offset:30
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v10 offset:26
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v9 offset:22
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v8 offset:18
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:16
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 8, v23
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 8, v22
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v29
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v29
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 8, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 8, v27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 8, v26
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v51
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:243
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v33
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 24, v32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 8, v32
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:239
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v31
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 8, v30
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v37
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v37
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v36
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 8, v36
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v35
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 24, v34
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 8, v34
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 24, v49
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v49
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 8, v48
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v39
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 8, v38
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v53
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 8, v52
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 24, v50
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 8, v50
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:211
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 24, v65
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 24, v64
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:149
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 8, v64
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:207
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v55
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 24, v54
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v54
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 24, v69
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v69
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 24, v68
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v68
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v67
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v67
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 8, v66
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v81
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 8, v80
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v71
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v71
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 24, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v70
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v85
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v85
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v84
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 8, v84
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v83
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 24, v82
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 8, v82
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:179
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 24, v97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 8, v97
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 24, v96
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 8, v96
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 24, v87
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v87
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 8, v86
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v101
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v100
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v99
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v15, 8, v15
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 24, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v14, 8, v14
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:159
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v13, 8, v13
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:157
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v12, 8, v12
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:155
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 8, v11
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:153
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v10
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:151
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:147
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:145
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:143
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:141
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:139
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:137
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:135
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v55 offset:133
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:131
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v54 offset:129
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:127
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v69 offset:125
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:123
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v68 offset:121
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:119
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:117
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:115
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:113
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:111
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:109
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:107
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:105
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:103
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:101
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:99
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:97
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:95
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:93
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:91
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v84 offset:89
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:87
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:85
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:83
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:81
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:79
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:77
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:75
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:73
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:71
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:69
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:67
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:63
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:61
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:59
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:57
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:55
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:53
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:51
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:49
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:43
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v15 offset:41
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:47
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v14 offset:45
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:35
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v13 offset:33
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:39
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v12 offset:37
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:31
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:29
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:25
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:23
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:21
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:19
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v7 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v7 offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v6 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v6 offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v5 offset:6
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v5 offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v4 offset:2
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v4, 8, v4
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v7 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v6 offset:9
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:7
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v5 offset:5
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:3
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v4 offset:1
+; ALIGNED-NEXT:    s_cbranch_scc1 .LBB5_2
+; ALIGNED-NEXT:  .LBB5_3: ; %Flow5
+; ALIGNED-NEXT:    s_andn2_saveexec_b32 s8, s6
+; ALIGNED-NEXT:    s_cbranch_execz .LBB5_6
+; ALIGNED-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; ALIGNED-NEXT:    s_movk_i32 s6, 0xff00
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0x700
+; ALIGNED-NEXT:    s_mov_b32 s7, -1
+; ALIGNED-NEXT:  .LBB5_5: ; %memmove_bwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v24, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    flat_load_dwordx4 v[16:19], v[24:25] offset:240
+; ALIGNED-NEXT:    flat_load_dwordx4 v[20:23], v[24:25] offset:224
+; ALIGNED-NEXT:    flat_load_dwordx4 v[4:7], v[24:25]
+; ALIGNED-NEXT:    flat_load_dwordx4 v[8:11], v[24:25] offset:16
+; ALIGNED-NEXT:    flat_load_dwordx4 v[12:15], v[24:25] offset:32
+; ALIGNED-NEXT:    flat_load_dwordx4 v[112:115], v[24:25] offset:48
+; ALIGNED-NEXT:    flat_load_dwordx4 v[116:119], v[24:25] offset:64
+; ALIGNED-NEXT:    flat_load_dwordx4 v[40:43], v[24:25] offset:80
+; ALIGNED-NEXT:    flat_load_dwordx4 v[26:29], v[24:25] offset:96
+; ALIGNED-NEXT:    flat_load_dwordx4 v[32:35], v[24:25] offset:112
+; ALIGNED-NEXT:    flat_load_dwordx4 v[44:47], v[24:25] offset:128
+; ALIGNED-NEXT:    flat_load_dwordx4 v[52:55], v[24:25] offset:144
+; ALIGNED-NEXT:    flat_load_dwordx4 v[66:69], v[24:25] offset:160
+; ALIGNED-NEXT:    flat_load_dwordx4 v[81:84], v[24:25] offset:176
+; ALIGNED-NEXT:    flat_load_dwordx4 v[96:99], v[24:25] offset:192
+; ALIGNED-NEXT:    flat_load_dwordx4 v[100:103], v[24:25] offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15) lgkmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, -1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v31 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v30 offset:250
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v25 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v24 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:240
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(22)
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v51 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v50 offset:234
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v49 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v36 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:224
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(16)
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_store_dword v102, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_store_dword v103, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 8, v30
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v71 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v70 offset:218
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v65 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v64 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v96, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    buffer_store_dword v97, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v87, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    buffer_load_dword v86, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_load_dword v85, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v87 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v86 offset:202
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v85 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:196
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v80 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v101, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    buffer_load_dword v99, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_load_dword v96, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v101 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v99 offset:186
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:184
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v96 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:180
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v81 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v100, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    buffer_load_dword v97, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v100 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:172
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v97 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:168
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v82 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:164
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v66 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v98, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v98 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:156
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v83 offset:154
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:152
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v67 offset:150
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v52 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v84, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v84 offset:142
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v84 offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v68 offset:138
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v68 offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v53 offset:134
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:132
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v37 offset:130
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:448
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:452
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:456
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:460
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:460
+; ALIGNED-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:456
+; ALIGNED-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:452
+; ALIGNED-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:448
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v69 offset:126
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v69 offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v54 offset:122
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v54 offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v38 offset:118
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v32 offset:114
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:464
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:468
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:472
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:476
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:476
+; ALIGNED-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:472
+; ALIGNED-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:468
+; ALIGNED-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:464
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v55 offset:110
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v55 offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v39 offset:106
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v33 offset:102
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v26 offset:98
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v48 offset:94
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v34 offset:90
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v27 offset:86
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v21 offset:82
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v116, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    buffer_store_dword v117, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_store_dword v118, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_store_dword v119, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v35 offset:78
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v28 offset:74
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v22 offset:70
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v19 offset:66
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:512
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:516
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:520
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:524
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:524
+; ALIGNED-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:520
+; ALIGNED-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:516
+; ALIGNED-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:512
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v50
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 8, v50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v29 offset:62
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v23 offset:58
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v20 offset:54
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v18 offset:50
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:528
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:532
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:536
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:540
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:536
+; ALIGNED-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:540
+; ALIGNED-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:528
+; ALIGNED-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:532
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v15 offset:42
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v15 offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v14 offset:46
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v14 offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v13 offset:34
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v13 offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v12 offset:38
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v12 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:480
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:484
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:488
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:492
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:492
+; ALIGNED-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:488
+; ALIGNED-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:484
+; ALIGNED-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:480
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v11 offset:30
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v10 offset:26
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v9 offset:22
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v8 offset:18
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:16
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:496
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:500
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:504
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:508
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:508
+; ALIGNED-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:504
+; ALIGNED-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:500
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:496
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v49
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v49
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v36
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 8, v36
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v71
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v71
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v64
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 8, v64
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v67
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v67
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:243
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v87
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v87
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 8, v86
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:239
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v85
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v85
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 8, v80
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v101
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v99
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v96
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 8, v96
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v81
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v100
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 8, v97
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v82
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 8, v82
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 8, v66
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v83
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 8, v52
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:211
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v84
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 8, v84
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v68
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:149
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 24, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v68
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:207
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v53
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 24, v37
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v37
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v69
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v69
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v54
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v54
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 8, v38
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 24, v32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 8, v32
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v55
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v39
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v33
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 8, v26
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 8, v48
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v34
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 8, v34
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 8, v27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 8, v21
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:179
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v35
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 8, v28
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 8, v22
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 8, v19
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v29
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v29
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 8, v23
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 8, v20
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 8, v18
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v15, 8, v15
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v14, 8, v14
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:159
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v13, 8, v13
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:157
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v12, 8, v12
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:155
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 8, v11
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:153
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 24, v10
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:151
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v65 offset:147
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v52 offset:145
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v25 offset:143
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v84 offset:141
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v64 offset:139
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v68 offset:137
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v113 offset:135
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v53 offset:133
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v87 offset:131
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v37 offset:129
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v24 offset:127
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v69 offset:125
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v86 offset:123
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v54 offset:121
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v114 offset:119
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v38 offset:117
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v85 offset:115
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v32 offset:113
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v51 offset:111
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v55 offset:109
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v80 offset:107
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v39 offset:105
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v115 offset:103
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v33 offset:101
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v101 offset:99
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v26 offset:97
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v50 offset:95
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v48 offset:93
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v99 offset:91
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v34 offset:89
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v102 offset:87
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v27 offset:85
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v96 offset:83
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v21 offset:81
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v49 offset:79
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v35 offset:77
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v81 offset:75
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v28 offset:73
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v31 offset:71
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v22 offset:69
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v100 offset:67
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v19 offset:65
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v36 offset:63
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v29 offset:61
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v97 offset:59
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v23 offset:57
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v103 offset:55
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v20 offset:53
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v82 offset:51
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v18 offset:49
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v71 offset:43
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v15 offset:41
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v66 offset:47
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v14 offset:45
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v30 offset:35
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v13 offset:33
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v98 offset:39
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v12 offset:37
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v70 offset:31
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:29
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v83 offset:27
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:25
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v112 offset:23
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:21
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v67 offset:19
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v7 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v7 offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v6 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v6 offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v5 offset:6
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v5 offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[16:17], v4 offset:2
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v4, 8, v4
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v8 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v7 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v9 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v6 offset:9
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v10 offset:7
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v5 offset:5
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v11 offset:3
+; ALIGNED-NEXT:    flat_store_byte v[16:17], v4 offset:1
+; ALIGNED-NEXT:    s_cbranch_scc0 .LBB5_5
+; ALIGNED-NEXT:  .LBB5_6: ; %Flow6
+; ALIGNED-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memmove_p0_p0_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b32 s4, exec_lo
+; UNROLL3-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; UNROLL3-NEXT:    s_xor_b32 s6, exec_lo, s4
+; UNROLL3-NEXT:    s_cbranch_execz .LBB5_4
+; UNROLL3-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB5_2: ; %memmove_fwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[12:13]
+; UNROLL3-NEXT:    flat_load_dwordx4 v[8:11], v[12:13] offset:16
+; UNROLL3-NEXT:    flat_load_dwordx4 v[12:15], v[12:13] offset:32
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    s_cmp_lg_u64 s[4:5], 0x7e0
+; UNROLL3-NEXT:    s_cbranch_scc1 .LBB5_2
+; UNROLL3-NEXT:  ; %bb.3: ; %memmove_fwd_residual
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2016
+; UNROLL3-NEXT:    flat_load_dwordx4 v[2:5], v[2:3] offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[2:5] offset:2032
+; UNROLL3-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; UNROLL3-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; UNROLL3-NEXT:  .LBB5_4: ; %Flow3
+; UNROLL3-NEXT:    s_andn2_saveexec_b32 s8, s6
+; UNROLL3-NEXT:    s_cbranch_execz .LBB5_7
+; UNROLL3-NEXT:  ; %bb.5: ; %memmove_bwd_residual
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:2032
+; UNROLL3-NEXT:    s_movk_i32 s6, 0xffd0
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0x7b0
+; UNROLL3-NEXT:    s_mov_b32 s7, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2032
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[2:3] offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2016
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB5_6: ; %memmove_bwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    flat_load_dwordx4 v[4:7], v[12:13]
+; UNROLL3-NEXT:    flat_load_dwordx4 v[8:11], v[12:13] offset:16
+; UNROLL3-NEXT:    flat_load_dwordx4 v[12:15], v[12:13] offset:32
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 0xffffffd0
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; UNROLL3-NEXT:    s_cbranch_scc0 .LBB5_6
+; UNROLL3-NEXT:  .LBB5_7: ; %Flow4
+; UNROLL3-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; UNROLL3-NEXT:    s_waitcnt lgkmcnt(0)
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memmove.p0.p0.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(0) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align 1 readonly %src) {
+; CHECK-LABEL: memmove_p1_p1_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, exec_lo
+; CHECK-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; CHECK-NEXT:    s_xor_b32 s6, exec_lo, s4
+; CHECK-NEXT:    s_cbranch_execz .LBB6_3
+; CHECK-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB6_2: ; %memmove_fwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[96:97], off offset:224
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[96:97], off offset:240
+; CHECK-NEXT:    global_load_dwordx4 v[12:15], v[96:97], off offset:192
+; CHECK-NEXT:    global_load_dwordx4 v[16:19], v[96:97], off offset:208
+; CHECK-NEXT:    global_load_dwordx4 v[20:23], v[96:97], off offset:160
+; CHECK-NEXT:    global_load_dwordx4 v[24:27], v[96:97], off offset:176
+; CHECK-NEXT:    global_load_dwordx4 v[28:31], v[96:97], off offset:128
+; CHECK-NEXT:    global_load_dwordx4 v[32:35], v[96:97], off offset:144
+; CHECK-NEXT:    global_load_dwordx4 v[36:39], v[96:97], off offset:96
+; CHECK-NEXT:    global_load_dwordx4 v[48:51], v[96:97], off offset:112
+; CHECK-NEXT:    global_load_dwordx4 v[52:55], v[96:97], off offset:64
+; CHECK-NEXT:    global_load_dwordx4 v[64:67], v[96:97], off offset:80
+; CHECK-NEXT:    global_load_dwordx4 v[68:71], v[96:97], off offset:32
+; CHECK-NEXT:    global_load_dwordx4 v[80:83], v[96:97], off offset:48
+; CHECK-NEXT:    global_load_dwordx4 v[84:87], v[96:97], off
+; CHECK-NEXT:    global_load_dwordx4 v[96:99], v[96:97], off offset:16
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[4:7], off offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[8:11], off offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[12:15], off offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[16:19], off offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[20:23], off offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[24:27], off offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[28:31], off offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[32:35], off offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[36:39], off offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[48:51], off offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[52:55], off offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[64:67], off offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[68:71], off offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[80:83], off offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[84:87], off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[96:99], off offset:16
+; CHECK-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; CHECK-NEXT:    s_cbranch_scc1 .LBB6_2
+; CHECK-NEXT:  .LBB6_3: ; %Flow9
+; CHECK-NEXT:    s_andn2_saveexec_b32 s8, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB6_6
+; CHECK-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; CHECK-NEXT:    s_movk_i32 s6, 0xff00
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0x700
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:  .LBB6_5: ; %memmove_bwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[96:97], off offset:224
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[96:97], off offset:240
+; CHECK-NEXT:    global_load_dwordx4 v[12:15], v[96:97], off offset:192
+; CHECK-NEXT:    global_load_dwordx4 v[16:19], v[96:97], off offset:208
+; CHECK-NEXT:    global_load_dwordx4 v[20:23], v[96:97], off offset:160
+; CHECK-NEXT:    global_load_dwordx4 v[24:27], v[96:97], off offset:176
+; CHECK-NEXT:    global_load_dwordx4 v[28:31], v[96:97], off offset:128
+; CHECK-NEXT:    global_load_dwordx4 v[32:35], v[96:97], off offset:144
+; CHECK-NEXT:    global_load_dwordx4 v[36:39], v[96:97], off offset:96
+; CHECK-NEXT:    global_load_dwordx4 v[48:51], v[96:97], off offset:112
+; CHECK-NEXT:    global_load_dwordx4 v[52:55], v[96:97], off offset:64
+; CHECK-NEXT:    global_load_dwordx4 v[64:67], v[96:97], off offset:80
+; CHECK-NEXT:    global_load_dwordx4 v[68:71], v[96:97], off offset:32
+; CHECK-NEXT:    global_load_dwordx4 v[80:83], v[96:97], off offset:48
+; CHECK-NEXT:    global_load_dwordx4 v[84:87], v[96:97], off
+; CHECK-NEXT:    global_load_dwordx4 v[96:99], v[96:97], off offset:16
+; CHECK-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; CHECK-NEXT:    s_addc_u32 s5, s5, -1
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[4:7], off offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[8:11], off offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[12:15], off offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[16:19], off offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[20:23], off offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[24:27], off offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[28:31], off offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[32:35], off offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[36:39], off offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[48:51], off offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[52:55], off offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[64:67], off offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[68:71], off offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[80:83], off offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[84:87], off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    global_store_dwordx4 v[100:101], v[96:99], off offset:16
+; CHECK-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_cbranch_scc0 .LBB6_5
+; CHECK-NEXT:  .LBB6_6: ; %Flow10
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memmove_p1_p1_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_mov_b32 s4, exec_lo
+; ALIGNED-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; ALIGNED-NEXT:    s_xor_b32 s6, exec_lo, s4
+; ALIGNED-NEXT:    s_cbranch_execz .LBB6_3
+; ALIGNED-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:  .LBB6_2: ; %memmove_fwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v20, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v21, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    global_load_dwordx4 v[16:19], v[20:21], off offset:240
+; ALIGNED-NEXT:    global_load_dwordx4 v[22:25], v[20:21], off offset:224
+; ALIGNED-NEXT:    global_load_dwordx4 v[4:7], v[20:21], off
+; ALIGNED-NEXT:    global_load_dwordx4 v[8:11], v[20:21], off offset:16
+; ALIGNED-NEXT:    global_load_dwordx4 v[12:15], v[20:21], off offset:32
+; ALIGNED-NEXT:    global_load_dwordx4 v[98:101], v[20:21], off offset:48
+; ALIGNED-NEXT:    global_load_dwordx4 v[112:115], v[20:21], off offset:64
+; ALIGNED-NEXT:    global_load_dwordx4 v[82:85], v[20:21], off offset:80
+; ALIGNED-NEXT:    global_load_dwordx4 v[116:119], v[20:21], off offset:96
+; ALIGNED-NEXT:    global_load_dwordx4 v[66:69], v[20:21], off offset:112
+; ALIGNED-NEXT:    global_load_dwordx4 v[40:43], v[20:21], off offset:128
+; ALIGNED-NEXT:    global_load_dwordx4 v[50:53], v[20:21], off offset:144
+; ALIGNED-NEXT:    global_load_dwordx4 v[44:47], v[20:21], off offset:160
+; ALIGNED-NEXT:    global_load_dwordx4 v[34:37], v[20:21], off offset:176
+; ALIGNED-NEXT:    global_load_dwordx4 v[30:33], v[20:21], off offset:192
+; ALIGNED-NEXT:    global_load_dwordx4 v[26:29], v[20:21], off offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v21, off offset:254
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v20, off offset:250
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v19, off offset:246
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v18, off offset:242
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:240
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 8, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 8, v20
+; ALIGNED-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v25, off offset:238
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v24, off offset:234
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v23, off offset:230
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v22, off offset:226
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:224
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v29, off offset:222
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v28, off offset:218
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v27, off offset:214
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v26, off offset:210
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:208
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v33, off offset:206
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v32, off offset:202
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v31, off offset:198
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:196
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v30, off offset:194
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:192
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v37, off offset:190
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v36, off offset:186
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:184
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v35, off offset:182
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:180
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v34, off offset:178
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:176
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v49, off offset:174
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:172
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v48, off offset:170
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:168
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v39, off offset:166
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:164
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v38, off offset:162
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:160
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v53, off offset:158
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:156
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v52, off offset:154
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:152
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v51, off offset:150
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v50, off offset:146
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:144
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v65, off offset:142
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v64, off offset:138
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v55, off offset:134
+; ALIGNED-NEXT:    global_store_byte v[16:17], v55, off offset:132
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v54, off offset:130
+; ALIGNED-NEXT:    global_store_byte v[16:17], v54, off offset:128
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v69, off offset:126
+; ALIGNED-NEXT:    global_store_byte v[16:17], v69, off offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v68, off offset:122
+; ALIGNED-NEXT:    global_store_byte v[16:17], v68, off offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v67, off offset:118
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v66, off offset:114
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:112
+; ALIGNED-NEXT:    buffer_store_dword v116, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    buffer_store_dword v117, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_store_dword v118, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_store_dword v119, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v81, off offset:110
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v80, off offset:106
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v71, off offset:102
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v70, off offset:98
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:96
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_store_dword v85, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v85, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v84, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v85, off offset:94
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v84, off offset:90
+; ALIGNED-NEXT:    global_store_byte v[16:17], v84, off offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v83, off offset:86
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v82, off offset:82
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:80
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v97, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    buffer_load_dword v96, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v87, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v86, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 8, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 8, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 8, v24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v97, off offset:78
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v96, off offset:74
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v87, off offset:70
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v86, off offset:66
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:64
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v101, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_load_dword v100, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_load_dword v99, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_load_dword v98, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v101, off offset:62
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v100, off offset:58
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v99, off offset:54
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v98, off offset:50
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:48
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v15, off offset:42
+; ALIGNED-NEXT:    global_store_byte v[16:17], v15, off offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v14, off offset:46
+; ALIGNED-NEXT:    global_store_byte v[16:17], v14, off offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v13, off offset:34
+; ALIGNED-NEXT:    global_store_byte v[16:17], v13, off offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v12, off offset:38
+; ALIGNED-NEXT:    global_store_byte v[16:17], v12, off offset:36
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v11, off offset:30
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v10, off offset:26
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v9, off offset:22
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v8, off offset:18
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:16
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 8, v23
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 8, v22
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v29
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v29
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 8, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 8, v27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 8, v26
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v51
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:243
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v33
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 24, v32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 8, v32
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:239
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v31
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 8, v30
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v37
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v37
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v36
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 8, v36
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v35
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 24, v34
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 8, v34
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 24, v49
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v49
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 8, v48
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v39
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 8, v38
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v53
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 8, v52
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 24, v50
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 8, v50
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:211
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 24, v65
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 24, v64
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:149
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 8, v64
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:207
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v55
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 24, v54
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v54
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 24, v69
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v69
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 24, v68
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v68
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v67
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v67
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 8, v66
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v81
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 8, v80
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v71
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v71
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 24, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v70
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v85
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v85
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v84
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 8, v84
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v83
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 24, v82
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 8, v82
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:179
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 24, v97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 8, v97
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 24, v96
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 8, v96
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 24, v87
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v87
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 8, v86
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v101
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v100
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v99
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v15, 8, v15
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 24, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v14, 8, v14
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:159
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v13, 8, v13
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:157
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v12, 8, v12
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:155
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 8, v11
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:153
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v10
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:151
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:147
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:145
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:143
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:141
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:139
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:137
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:135
+; ALIGNED-NEXT:    global_store_byte v[16:17], v55, off offset:133
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:131
+; ALIGNED-NEXT:    global_store_byte v[16:17], v54, off offset:129
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:127
+; ALIGNED-NEXT:    global_store_byte v[16:17], v69, off offset:125
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:123
+; ALIGNED-NEXT:    global_store_byte v[16:17], v68, off offset:121
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:119
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:117
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:115
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:113
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:111
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:109
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:107
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:105
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:103
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:101
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:99
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:97
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:95
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:93
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:91
+; ALIGNED-NEXT:    global_store_byte v[16:17], v84, off offset:89
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:87
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:85
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:83
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:81
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:79
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:77
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:75
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:73
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:71
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:69
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:67
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:63
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:61
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:59
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:57
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:55
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:53
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:51
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:49
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:43
+; ALIGNED-NEXT:    global_store_byte v[16:17], v15, off offset:41
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:47
+; ALIGNED-NEXT:    global_store_byte v[16:17], v14, off offset:45
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:35
+; ALIGNED-NEXT:    global_store_byte v[16:17], v13, off offset:33
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:39
+; ALIGNED-NEXT:    global_store_byte v[16:17], v12, off offset:37
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:31
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:29
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:25
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:23
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:21
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:19
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v7, off offset:14
+; ALIGNED-NEXT:    global_store_byte v[16:17], v7, off offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v6, off offset:10
+; ALIGNED-NEXT:    global_store_byte v[16:17], v6, off offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v5, off offset:6
+; ALIGNED-NEXT:    global_store_byte v[16:17], v5, off offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v4, off offset:2
+; ALIGNED-NEXT:    global_store_byte v[16:17], v4, off
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v4, 8, v4
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:15
+; ALIGNED-NEXT:    global_store_byte v[16:17], v7, off offset:13
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:11
+; ALIGNED-NEXT:    global_store_byte v[16:17], v6, off offset:9
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:7
+; ALIGNED-NEXT:    global_store_byte v[16:17], v5, off offset:5
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:3
+; ALIGNED-NEXT:    global_store_byte v[16:17], v4, off offset:1
+; ALIGNED-NEXT:    s_cbranch_scc1 .LBB6_2
+; ALIGNED-NEXT:  .LBB6_3: ; %Flow9
+; ALIGNED-NEXT:    s_andn2_saveexec_b32 s8, s6
+; ALIGNED-NEXT:    s_cbranch_execz .LBB6_6
+; ALIGNED-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; ALIGNED-NEXT:    s_movk_i32 s6, 0xff00
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0x700
+; ALIGNED-NEXT:    s_mov_b32 s7, -1
+; ALIGNED-NEXT:  .LBB6_5: ; %memmove_bwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v24, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v25, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    global_load_dwordx4 v[16:19], v[24:25], off offset:240
+; ALIGNED-NEXT:    global_load_dwordx4 v[20:23], v[24:25], off offset:224
+; ALIGNED-NEXT:    global_load_dwordx4 v[4:7], v[24:25], off
+; ALIGNED-NEXT:    global_load_dwordx4 v[8:11], v[24:25], off offset:16
+; ALIGNED-NEXT:    global_load_dwordx4 v[12:15], v[24:25], off offset:32
+; ALIGNED-NEXT:    global_load_dwordx4 v[112:115], v[24:25], off offset:48
+; ALIGNED-NEXT:    global_load_dwordx4 v[116:119], v[24:25], off offset:64
+; ALIGNED-NEXT:    global_load_dwordx4 v[40:43], v[24:25], off offset:80
+; ALIGNED-NEXT:    global_load_dwordx4 v[26:29], v[24:25], off offset:96
+; ALIGNED-NEXT:    global_load_dwordx4 v[32:35], v[24:25], off offset:112
+; ALIGNED-NEXT:    global_load_dwordx4 v[44:47], v[24:25], off offset:128
+; ALIGNED-NEXT:    global_load_dwordx4 v[52:55], v[24:25], off offset:144
+; ALIGNED-NEXT:    global_load_dwordx4 v[66:69], v[24:25], off offset:160
+; ALIGNED-NEXT:    global_load_dwordx4 v[81:84], v[24:25], off offset:176
+; ALIGNED-NEXT:    global_load_dwordx4 v[96:99], v[24:25], off offset:192
+; ALIGNED-NEXT:    global_load_dwordx4 v[100:103], v[24:25], off offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    buffer_load_dword v30, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_load_dword v25, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_load_dword v24, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, -1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v31, off offset:254
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v30, off offset:250
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v25, off offset:246
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v24, off offset:242
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:240
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v51, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    buffer_load_dword v50, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_load_dword v49, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v51, off offset:238
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v50, off offset:234
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v49, off offset:230
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v36, off offset:226
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:224
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_store_dword v102, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_store_dword v103, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v71, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    buffer_load_dword v70, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_load_dword v65, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_load_dword v64, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v31
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 8, v30
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v71, off offset:222
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v70, off offset:218
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v65, off offset:214
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v64, off offset:210
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:208
+; ALIGNED-NEXT:    buffer_store_dword v96, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    buffer_store_dword v97, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v87, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    buffer_load_dword v86, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_load_dword v85, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_load_dword v80, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v87, off offset:206
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v86, off offset:202
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v85, off offset:198
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:196
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v80, off offset:194
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:192
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v101, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    buffer_load_dword v99, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_load_dword v96, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_load_dword v81, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v101, off offset:190
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v99, off offset:186
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:184
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v96, off offset:182
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:180
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v81, off offset:178
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:176
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v100, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    buffer_load_dword v97, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_load_dword v82, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_load_dword v66, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v100, off offset:174
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:172
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v97, off offset:170
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:168
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v82, off offset:166
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:164
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v66, off offset:162
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:160
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v98, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    buffer_load_dword v83, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_load_dword v67, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_load_dword v52, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v98, off offset:158
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:156
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v83, off offset:154
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:152
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v67, off offset:150
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v52, off offset:146
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:144
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v84, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    buffer_load_dword v68, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_load_dword v53, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v84, off offset:142
+; ALIGNED-NEXT:    global_store_byte v[16:17], v84, off offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v68, off offset:138
+; ALIGNED-NEXT:    global_store_byte v[16:17], v68, off offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v53, off offset:134
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:132
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v37, off offset:130
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:128
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:448
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:452
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:456
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:460
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v69, off, s[0:3], s32 offset:460
+; ALIGNED-NEXT:    buffer_load_dword v54, off, s[0:3], s32 offset:456
+; ALIGNED-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:452
+; ALIGNED-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:448
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v69, off offset:126
+; ALIGNED-NEXT:    global_store_byte v[16:17], v69, off offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v54, off offset:122
+; ALIGNED-NEXT:    global_store_byte v[16:17], v54, off offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v38, off offset:118
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v32, off offset:114
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:112
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:464
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:468
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:472
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:476
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v55, off, s[0:3], s32 offset:476
+; ALIGNED-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:472
+; ALIGNED-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:468
+; ALIGNED-NEXT:    buffer_load_dword v26, off, s[0:3], s32 offset:464
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v55, off offset:110
+; ALIGNED-NEXT:    global_store_byte v[16:17], v55, off offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v39, off offset:106
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v33, off offset:102
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v26, off offset:98
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:96
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v48, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_load_dword v27, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v48, off offset:94
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v34, off offset:90
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v27, off offset:86
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v21, off offset:82
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:80
+; ALIGNED-NEXT:    buffer_store_dword v116, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    buffer_store_dword v117, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_store_dword v118, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_store_dword v119, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    buffer_load_dword v28, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v35, off offset:78
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v28, off offset:74
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v22, off offset:70
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v19, off offset:66
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:64
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:512
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:516
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:520
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:524
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v29, off, s[0:3], s32 offset:524
+; ALIGNED-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:520
+; ALIGNED-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:516
+; ALIGNED-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:512
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v51
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v50
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 8, v50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v29, off offset:62
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v23, off offset:58
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v20, off offset:54
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v18, off offset:50
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:48
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:528
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:532
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:536
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:540
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:536
+; ALIGNED-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:540
+; ALIGNED-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:528
+; ALIGNED-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:532
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v15, off offset:42
+; ALIGNED-NEXT:    global_store_byte v[16:17], v15, off offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v14, off offset:46
+; ALIGNED-NEXT:    global_store_byte v[16:17], v14, off offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v13, off offset:34
+; ALIGNED-NEXT:    global_store_byte v[16:17], v13, off offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v12, off offset:38
+; ALIGNED-NEXT:    global_store_byte v[16:17], v12, off offset:36
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:480
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:484
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:488
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:492
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:492
+; ALIGNED-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:488
+; ALIGNED-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:484
+; ALIGNED-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:480
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v11, off offset:30
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v10, off offset:26
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v9, off offset:22
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v8, off offset:18
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:16
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:496
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:500
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:504
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:508
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:508
+; ALIGNED-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:504
+; ALIGNED-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:500
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:496
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v49
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v49
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v36
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 8, v36
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v71
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v71
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v70
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v64
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 8, v64
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v67
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v67
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:243
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v87
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v87
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 8, v86
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:239
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v85
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v85
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 8, v80
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v101
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v99
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v96
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 8, v96
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v81
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v100
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 8, v97
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v82
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 8, v82
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 8, v66
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v83
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 8, v52
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:211
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 24, v84
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 8, v84
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v68
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:149
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 24, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v68
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:207
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v53
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 24, v37
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v37
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v69
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v69
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v54
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v54
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 8, v38
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 24, v32
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 8, v32
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v55
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v39
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v33
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v26, 8, v26
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 8, v48
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v34
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 8, v34
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v27, 8, v27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v96, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v21, 8, v21
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:179
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v35
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 8, v28
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v22, 8, v22
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v19, 8, v19
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v29
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v29
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v97, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v23, 8, v23
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v20, 8, v20
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v18, 8, v18
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v15, 8, v15
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v14, 8, v14
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:159
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v13, 8, v13
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:157
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v12, 8, v12
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:155
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 8, v11
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:153
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 24, v10
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:151
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; ALIGNED-NEXT:    global_store_byte v[16:17], v65, off offset:147
+; ALIGNED-NEXT:    global_store_byte v[16:17], v52, off offset:145
+; ALIGNED-NEXT:    global_store_byte v[16:17], v25, off offset:143
+; ALIGNED-NEXT:    global_store_byte v[16:17], v84, off offset:141
+; ALIGNED-NEXT:    global_store_byte v[16:17], v64, off offset:139
+; ALIGNED-NEXT:    global_store_byte v[16:17], v68, off offset:137
+; ALIGNED-NEXT:    global_store_byte v[16:17], v113, off offset:135
+; ALIGNED-NEXT:    global_store_byte v[16:17], v53, off offset:133
+; ALIGNED-NEXT:    global_store_byte v[16:17], v87, off offset:131
+; ALIGNED-NEXT:    global_store_byte v[16:17], v37, off offset:129
+; ALIGNED-NEXT:    global_store_byte v[16:17], v24, off offset:127
+; ALIGNED-NEXT:    global_store_byte v[16:17], v69, off offset:125
+; ALIGNED-NEXT:    global_store_byte v[16:17], v86, off offset:123
+; ALIGNED-NEXT:    global_store_byte v[16:17], v54, off offset:121
+; ALIGNED-NEXT:    global_store_byte v[16:17], v114, off offset:119
+; ALIGNED-NEXT:    global_store_byte v[16:17], v38, off offset:117
+; ALIGNED-NEXT:    global_store_byte v[16:17], v85, off offset:115
+; ALIGNED-NEXT:    global_store_byte v[16:17], v32, off offset:113
+; ALIGNED-NEXT:    global_store_byte v[16:17], v51, off offset:111
+; ALIGNED-NEXT:    global_store_byte v[16:17], v55, off offset:109
+; ALIGNED-NEXT:    global_store_byte v[16:17], v80, off offset:107
+; ALIGNED-NEXT:    global_store_byte v[16:17], v39, off offset:105
+; ALIGNED-NEXT:    global_store_byte v[16:17], v115, off offset:103
+; ALIGNED-NEXT:    global_store_byte v[16:17], v33, off offset:101
+; ALIGNED-NEXT:    global_store_byte v[16:17], v101, off offset:99
+; ALIGNED-NEXT:    global_store_byte v[16:17], v26, off offset:97
+; ALIGNED-NEXT:    global_store_byte v[16:17], v50, off offset:95
+; ALIGNED-NEXT:    global_store_byte v[16:17], v48, off offset:93
+; ALIGNED-NEXT:    global_store_byte v[16:17], v99, off offset:91
+; ALIGNED-NEXT:    global_store_byte v[16:17], v34, off offset:89
+; ALIGNED-NEXT:    global_store_byte v[16:17], v102, off offset:87
+; ALIGNED-NEXT:    global_store_byte v[16:17], v27, off offset:85
+; ALIGNED-NEXT:    global_store_byte v[16:17], v96, off offset:83
+; ALIGNED-NEXT:    global_store_byte v[16:17], v21, off offset:81
+; ALIGNED-NEXT:    global_store_byte v[16:17], v49, off offset:79
+; ALIGNED-NEXT:    global_store_byte v[16:17], v35, off offset:77
+; ALIGNED-NEXT:    global_store_byte v[16:17], v81, off offset:75
+; ALIGNED-NEXT:    global_store_byte v[16:17], v28, off offset:73
+; ALIGNED-NEXT:    global_store_byte v[16:17], v31, off offset:71
+; ALIGNED-NEXT:    global_store_byte v[16:17], v22, off offset:69
+; ALIGNED-NEXT:    global_store_byte v[16:17], v100, off offset:67
+; ALIGNED-NEXT:    global_store_byte v[16:17], v19, off offset:65
+; ALIGNED-NEXT:    global_store_byte v[16:17], v36, off offset:63
+; ALIGNED-NEXT:    global_store_byte v[16:17], v29, off offset:61
+; ALIGNED-NEXT:    global_store_byte v[16:17], v97, off offset:59
+; ALIGNED-NEXT:    global_store_byte v[16:17], v23, off offset:57
+; ALIGNED-NEXT:    global_store_byte v[16:17], v103, off offset:55
+; ALIGNED-NEXT:    global_store_byte v[16:17], v20, off offset:53
+; ALIGNED-NEXT:    global_store_byte v[16:17], v82, off offset:51
+; ALIGNED-NEXT:    global_store_byte v[16:17], v18, off offset:49
+; ALIGNED-NEXT:    global_store_byte v[16:17], v71, off offset:43
+; ALIGNED-NEXT:    global_store_byte v[16:17], v15, off offset:41
+; ALIGNED-NEXT:    global_store_byte v[16:17], v66, off offset:47
+; ALIGNED-NEXT:    global_store_byte v[16:17], v14, off offset:45
+; ALIGNED-NEXT:    global_store_byte v[16:17], v30, off offset:35
+; ALIGNED-NEXT:    global_store_byte v[16:17], v13, off offset:33
+; ALIGNED-NEXT:    global_store_byte v[16:17], v98, off offset:39
+; ALIGNED-NEXT:    global_store_byte v[16:17], v12, off offset:37
+; ALIGNED-NEXT:    global_store_byte v[16:17], v70, off offset:31
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:29
+; ALIGNED-NEXT:    global_store_byte v[16:17], v83, off offset:27
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:25
+; ALIGNED-NEXT:    global_store_byte v[16:17], v112, off offset:23
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:21
+; ALIGNED-NEXT:    global_store_byte v[16:17], v67, off offset:19
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v7, off offset:14
+; ALIGNED-NEXT:    global_store_byte v[16:17], v7, off offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v6, off offset:10
+; ALIGNED-NEXT:    global_store_byte v[16:17], v6, off offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v5, off offset:6
+; ALIGNED-NEXT:    global_store_byte v[16:17], v5, off offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    global_store_byte_d16_hi v[16:17], v4, off offset:2
+; ALIGNED-NEXT:    global_store_byte v[16:17], v4, off
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v8, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v9, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v10, 24, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v11, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v4, 8, v4
+; ALIGNED-NEXT:    global_store_byte v[16:17], v8, off offset:15
+; ALIGNED-NEXT:    global_store_byte v[16:17], v7, off offset:13
+; ALIGNED-NEXT:    global_store_byte v[16:17], v9, off offset:11
+; ALIGNED-NEXT:    global_store_byte v[16:17], v6, off offset:9
+; ALIGNED-NEXT:    global_store_byte v[16:17], v10, off offset:7
+; ALIGNED-NEXT:    global_store_byte v[16:17], v5, off offset:5
+; ALIGNED-NEXT:    global_store_byte v[16:17], v11, off offset:3
+; ALIGNED-NEXT:    global_store_byte v[16:17], v4, off offset:1
+; ALIGNED-NEXT:    s_cbranch_scc0 .LBB6_5
+; ALIGNED-NEXT:  .LBB6_6: ; %Flow10
+; ALIGNED-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memmove_p1_p1_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b32 s4, exec_lo
+; UNROLL3-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; UNROLL3-NEXT:    s_xor_b32 s6, exec_lo, s4
+; UNROLL3-NEXT:    s_cbranch_execz .LBB6_4
+; UNROLL3-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB6_2: ; %memmove_fwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[12:13], off
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[12:13], off offset:16
+; UNROLL3-NEXT:    global_load_dwordx4 v[12:15], v[12:13], off offset:32
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[4:7], off
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[8:11], off offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[12:15], off offset:32
+; UNROLL3-NEXT:    s_cmp_lg_u64 s[4:5], 0x7e0
+; UNROLL3-NEXT:    s_cbranch_scc1 .LBB6_2
+; UNROLL3-NEXT:  ; %bb.3: ; %memmove_fwd_residual
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2016
+; UNROLL3-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:2032
+; UNROLL3-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; UNROLL3-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; UNROLL3-NEXT:  .LBB6_4: ; %Flow7
+; UNROLL3-NEXT:    s_andn2_saveexec_b32 s8, s6
+; UNROLL3-NEXT:    s_cbranch_execz .LBB6_7
+; UNROLL3-NEXT:  ; %bb.5: ; %memmove_bwd_residual
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2032
+; UNROLL3-NEXT:    s_movk_i32 s6, 0xffd0
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0x7b0
+; UNROLL3-NEXT:    s_mov_b32 s7, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2032
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[0:1], v[4:7], off offset:2016
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB6_6: ; %memmove_bwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[12:13], off
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[12:13], off offset:16
+; UNROLL3-NEXT:    global_load_dwordx4 v[12:15], v[12:13], off offset:32
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 0xffffffd0
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[4:7], off
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[8:11], off offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    global_store_dwordx4 v[16:17], v[12:15], off offset:32
+; UNROLL3-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; UNROLL3-NEXT:    s_cbranch_scc0 .LBB6_6
+; UNROLL3-NEXT:  .LBB6_7: ; %Flow8
+; UNROLL3-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 1 %dst, ptr addrspace(1) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memmove_p0_p4_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align 1 readonly %src) {
+; CHECK-LABEL: memmove_p0_p4_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, exec_lo
+; CHECK-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; CHECK-NEXT:    s_xor_b32 s6, exec_lo, s4
+; CHECK-NEXT:    s_cbranch_execz .LBB7_3
+; CHECK-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:  .LBB7_2: ; %memmove_fwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[96:97], off offset:240
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[96:97], off offset:224
+; CHECK-NEXT:    global_load_dwordx4 v[12:15], v[96:97], off offset:208
+; CHECK-NEXT:    global_load_dwordx4 v[16:19], v[96:97], off offset:192
+; CHECK-NEXT:    global_load_dwordx4 v[20:23], v[96:97], off offset:176
+; CHECK-NEXT:    global_load_dwordx4 v[24:27], v[96:97], off offset:160
+; CHECK-NEXT:    global_load_dwordx4 v[28:31], v[96:97], off offset:144
+; CHECK-NEXT:    global_load_dwordx4 v[32:35], v[96:97], off offset:128
+; CHECK-NEXT:    global_load_dwordx4 v[36:39], v[96:97], off offset:112
+; CHECK-NEXT:    global_load_dwordx4 v[48:51], v[96:97], off offset:96
+; CHECK-NEXT:    global_load_dwordx4 v[52:55], v[96:97], off offset:80
+; CHECK-NEXT:    global_load_dwordx4 v[64:67], v[96:97], off offset:64
+; CHECK-NEXT:    global_load_dwordx4 v[68:71], v[96:97], off offset:48
+; CHECK-NEXT:    global_load_dwordx4 v[80:83], v[96:97], off offset:32
+; CHECK-NEXT:    global_load_dwordx4 v[84:87], v[96:97], off offset:16
+; CHECK-NEXT:    global_load_dwordx4 v[96:99], v[96:97], off
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[4:7] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[8:11] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[12:15] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[16:19] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[20:23] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[24:27] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[28:31] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[32:35] offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[36:39] offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
+; CHECK-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; CHECK-NEXT:    s_cbranch_scc1 .LBB7_2
+; CHECK-NEXT:  .LBB7_3: ; %Flow6
+; CHECK-NEXT:    s_andn2_saveexec_b32 s8, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB7_6
+; CHECK-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; CHECK-NEXT:    s_movk_i32 s6, 0xff00
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0x700
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:  .LBB7_5: ; %memmove_bwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    v_add_co_u32 v96, vcc_lo, v2, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v3, vcc_lo
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_clause 0xf
+; CHECK-NEXT:    global_load_dwordx4 v[4:7], v[96:97], off offset:240
+; CHECK-NEXT:    global_load_dwordx4 v[8:11], v[96:97], off offset:224
+; CHECK-NEXT:    global_load_dwordx4 v[12:15], v[96:97], off offset:208
+; CHECK-NEXT:    global_load_dwordx4 v[16:19], v[96:97], off offset:192
+; CHECK-NEXT:    global_load_dwordx4 v[20:23], v[96:97], off offset:176
+; CHECK-NEXT:    global_load_dwordx4 v[24:27], v[96:97], off offset:160
+; CHECK-NEXT:    global_load_dwordx4 v[28:31], v[96:97], off offset:144
+; CHECK-NEXT:    global_load_dwordx4 v[32:35], v[96:97], off offset:128
+; CHECK-NEXT:    global_load_dwordx4 v[36:39], v[96:97], off offset:112
+; CHECK-NEXT:    global_load_dwordx4 v[48:51], v[96:97], off offset:96
+; CHECK-NEXT:    global_load_dwordx4 v[52:55], v[96:97], off offset:80
+; CHECK-NEXT:    global_load_dwordx4 v[64:67], v[96:97], off offset:64
+; CHECK-NEXT:    global_load_dwordx4 v[68:71], v[96:97], off offset:48
+; CHECK-NEXT:    global_load_dwordx4 v[80:83], v[96:97], off offset:32
+; CHECK-NEXT:    global_load_dwordx4 v[84:87], v[96:97], off offset:16
+; CHECK-NEXT:    global_load_dwordx4 v[96:99], v[96:97], off
+; CHECK-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; CHECK-NEXT:    s_addc_u32 s5, s5, -1
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[4:7] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[8:11] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[12:15] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[16:19] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[20:23] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[24:27] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[28:31] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[32:35] offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[36:39] offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
+; CHECK-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_cbranch_scc0 .LBB7_5
+; CHECK-NEXT:  .LBB7_6: ; %Flow7
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memmove_p0_p4_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_mov_b32 s4, exec_lo
+; ALIGNED-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; ALIGNED-NEXT:    s_xor_b32 s6, exec_lo, s4
+; ALIGNED-NEXT:    s_cbranch_execz .LBB7_3
+; ALIGNED-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:  .LBB7_2: ; %memmove_fwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v4, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    v_add_co_u32 v96, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    global_load_dwordx4 v[112:115], v[4:5], off offset:240
+; ALIGNED-NEXT:    global_load_dwordx4 v[84:87], v[4:5], off offset:224
+; ALIGNED-NEXT:    global_load_dwordx4 v[80:83], v[4:5], off offset:208
+; ALIGNED-NEXT:    global_load_dwordx4 v[68:71], v[4:5], off offset:192
+; ALIGNED-NEXT:    global_load_dwordx4 v[64:67], v[4:5], off offset:176
+; ALIGNED-NEXT:    global_load_dwordx4 v[52:55], v[4:5], off offset:160
+; ALIGNED-NEXT:    global_load_dwordx4 v[48:51], v[4:5], off offset:144
+; ALIGNED-NEXT:    global_load_dwordx4 v[36:39], v[4:5], off offset:128
+; ALIGNED-NEXT:    global_load_dwordx4 v[32:35], v[4:5], off offset:112
+; ALIGNED-NEXT:    global_load_dwordx4 v[28:31], v[4:5], off offset:96
+; ALIGNED-NEXT:    global_load_dwordx4 v[24:27], v[4:5], off offset:80
+; ALIGNED-NEXT:    global_load_dwordx4 v[20:23], v[4:5], off offset:64
+; ALIGNED-NEXT:    global_load_dwordx4 v[16:19], v[4:5], off offset:48
+; ALIGNED-NEXT:    global_load_dwordx4 v[12:15], v[4:5], off offset:32
+; ALIGNED-NEXT:    global_load_dwordx4 v[8:11], v[4:5], off offset:16
+; ALIGNED-NEXT:    global_load_dwordx4 v[4:7], v[4:5], off
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v114, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_store_dword v115, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_store_dword v113, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_store_dword v112, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v114
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v114
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v114 offset:250
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v115 offset:254
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v115
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:252
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v115
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:248
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v113
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v113 offset:246
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 8, v113
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:244
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v112 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:240
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v112
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(14)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v86
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 8, v84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:243
+; ALIGNED-NEXT:    s_waitcnt vmcnt(13)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v82
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v82
+; ALIGNED-NEXT:    buffer_store_dword v86, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_store_dword v87, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    buffer_store_dword v85, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v86 offset:234
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v87 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:236
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:232
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v85 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:228
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v84 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:224
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v81
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v80
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v80
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:239
+; ALIGNED-NEXT:    s_waitcnt vmcnt(12)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v69
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 8, v69
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_store_dword v80, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v82 offset:218
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v83 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:220
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:216
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v81 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:212
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v80 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:208
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v68
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v66
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(10)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v54
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v54
+; ALIGNED-NEXT:    buffer_store_dword v70, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v70 offset:202
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v71 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:204
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:200
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v69 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:196
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v68 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:192
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v52
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v53
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 24, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v55
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:207
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v51
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 8, v51
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v49
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v49
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_store_dword v65, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v64, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v66 offset:186
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v67 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:188
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:184
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v65 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:180
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v64 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:176
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v38
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:187
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v39
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v39
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:179
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v34
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v34
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v54 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:168
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v55 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:172
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v52 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:160
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v53 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:164
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v33
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:163
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:167
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v29
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 24, v30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:173
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v29
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v50 offset:154
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v51 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:156
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:152
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v49 offset:150
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:148
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v48 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:144
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v26
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:155
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:153
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:159
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:157
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:151
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:149
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:147
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v22
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:145
+; ALIGNED-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v38 offset:138
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v39 offset:142
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v39 offset:140
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v38 offset:136
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v37 offset:134
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v37 offset:132
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v36 offset:130
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v36 offset:128
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:139
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:137
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:143
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:141
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:135
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:133
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:131
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:129
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v34 offset:122
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v35 offset:126
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v35 offset:124
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v34 offset:120
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v33 offset:118
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v33 offset:116
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v32 offset:114
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 24, v14
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:123
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:121
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:127
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:125
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:119
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:117
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:115
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:113
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v30 offset:106
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v31 offset:110
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v31 offset:108
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v30 offset:104
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v29 offset:102
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v29 offset:100
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v28 offset:98
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v28 offset:96
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:111
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:109
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v6
+; ALIGNED-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 8, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v11
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:103
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v17
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v17
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 24, v16
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v16
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 8, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 8, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v9
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:107
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 24, v8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:105
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 8, v7
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v5
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v5
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:91
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v4
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:89
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v4
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v26 offset:90
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v27 offset:94
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:95
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v27 offset:92
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:93
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v26 offset:88
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:87
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v25 offset:86
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v25 offset:84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:83
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v24 offset:82
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:81
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v24 offset:80
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v22 offset:74
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:75
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:73
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v23 offset:78
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v36 offset:79
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v23 offset:76
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v37 offset:77
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v22 offset:72
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v38 offset:71
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v21 offset:70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v39 offset:69
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v21 offset:68
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:67
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v20 offset:66
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v20 offset:64
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v18 offset:58
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:59
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:57
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v19 offset:62
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:63
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v19 offset:60
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:61
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v18 offset:56
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:55
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v17 offset:54
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v17 offset:52
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v32 offset:51
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v16 offset:50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v33 offset:49
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v16 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v14 offset:42
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v34 offset:43
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v35 offset:41
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v15 offset:46
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:47
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v15 offset:44
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:45
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v14 offset:40
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:39
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v13 offset:38
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v13 offset:36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:35
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v12 offset:34
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:33
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v12 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v10 offset:26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:27
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:25
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v11 offset:30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v28 offset:31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v11 offset:28
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v29 offset:29
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v10 offset:24
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v30 offset:23
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v9 offset:22
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v31 offset:21
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v9 offset:20
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:19
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v8 offset:18
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:17
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v8 offset:16
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v6 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:9
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v7 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v7 offset:12
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v6 offset:8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:7
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v5 offset:6
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:5
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v5 offset:4
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:3
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v4 offset:2
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:1
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v4
+; ALIGNED-NEXT:    s_cbranch_scc1 .LBB7_2
+; ALIGNED-NEXT:  .LBB7_3: ; %Flow6
+; ALIGNED-NEXT:    s_andn2_saveexec_b32 s8, s6
+; ALIGNED-NEXT:    s_cbranch_execz .LBB7_6
+; ALIGNED-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; ALIGNED-NEXT:    s_movk_i32 s6, 0xff00
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0x700
+; ALIGNED-NEXT:    s_mov_b32 s7, -1
+; ALIGNED-NEXT:  .LBB7_5: ; %memmove_bwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    v_add_co_u32 v4, vcc_lo, v2, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    v_add_co_u32 v96, vcc_lo, v0, s4
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v97, vcc_lo, s5, v1, vcc_lo
+; ALIGNED-NEXT:    s_clause 0xf
+; ALIGNED-NEXT:    global_load_dwordx4 v[98:101], v[4:5], off offset:240
+; ALIGNED-NEXT:    global_load_dwordx4 v[84:87], v[4:5], off offset:224
+; ALIGNED-NEXT:    global_load_dwordx4 v[80:83], v[4:5], off offset:208
+; ALIGNED-NEXT:    global_load_dwordx4 v[68:71], v[4:5], off offset:192
+; ALIGNED-NEXT:    global_load_dwordx4 v[64:67], v[4:5], off offset:176
+; ALIGNED-NEXT:    global_load_dwordx4 v[52:55], v[4:5], off offset:160
+; ALIGNED-NEXT:    global_load_dwordx4 v[48:51], v[4:5], off offset:144
+; ALIGNED-NEXT:    global_load_dwordx4 v[36:39], v[4:5], off offset:128
+; ALIGNED-NEXT:    global_load_dwordx4 v[32:35], v[4:5], off offset:112
+; ALIGNED-NEXT:    global_load_dwordx4 v[28:31], v[4:5], off offset:96
+; ALIGNED-NEXT:    global_load_dwordx4 v[24:27], v[4:5], off offset:80
+; ALIGNED-NEXT:    global_load_dwordx4 v[20:23], v[4:5], off offset:64
+; ALIGNED-NEXT:    global_load_dwordx4 v[16:19], v[4:5], off offset:48
+; ALIGNED-NEXT:    global_load_dwordx4 v[12:15], v[4:5], off offset:32
+; ALIGNED-NEXT:    global_load_dwordx4 v[8:11], v[4:5], off offset:16
+; ALIGNED-NEXT:    global_load_dwordx4 v[4:7], v[4:5], off
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, -1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(15)
+; ALIGNED-NEXT:    buffer_store_dword v100, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    buffer_store_dword v99, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_store_dword v98, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v100 offset:250
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v101 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:252
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:248
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v99 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:244
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v98 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:240
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v100
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v98
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v98
+; ALIGNED-NEXT:    s_waitcnt vmcnt(14)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v86
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v86
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:251
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:249
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:255
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:253
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:247
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:245
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v84
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:243
+; ALIGNED-NEXT:    s_waitcnt vmcnt(13)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v82
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:241
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v82
+; ALIGNED-NEXT:    buffer_store_dword v86, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_store_dword v87, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    buffer_store_dword v85, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v86 offset:234
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v87 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:236
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:232
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v85 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:228
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v84 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:224
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v83
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v81
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v81
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:235
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v80
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:233
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v80
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:239
+; ALIGNED-NEXT:    s_waitcnt vmcnt(12)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:237
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:231
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:229
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:227
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v69
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:225
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v69
+; ALIGNED-NEXT:    buffer_store_dword v82, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_store_dword v83, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_store_dword v80, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v82 offset:218
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v83 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:220
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:216
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v81 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:212
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v80 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:208
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v68
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v66
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v66
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:219
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:217
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:223
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:221
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v65
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:215
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:213
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(10)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v54
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:209
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v54
+; ALIGNED-NEXT:    buffer_store_dword v70, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v70 offset:202
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v71 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:204
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:200
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v69 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:196
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v68 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:192
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 8, v55
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 24, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 8, v52
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 24, v55
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:203
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 24, v53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:201
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:207
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:205
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:199
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v51
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:197
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v51
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:195
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v49
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:193
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v49
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:488
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:492
+; ALIGNED-NEXT:    buffer_store_dword v65, off, s[0:3], s32 offset:484
+; ALIGNED-NEXT:    buffer_store_dword v64, off, s[0:3], s32 offset:480
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v66 offset:186
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v67 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:188
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:184
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v65 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:180
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v64 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:176
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v48
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v48
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:187
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v39
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v38
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v38
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:185
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v39
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:191
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:189
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:183
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:181
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:179
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v34
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:177
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v34
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:508
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:504
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:500
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:496
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v54 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:168
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v55 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:172
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v52 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:160
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v53 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:164
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 24, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v35
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 24, v33
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v116, 8, v33
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:171
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:169
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v32
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:173
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 24, v31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:163
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:161
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v29
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:175
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v30
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:167
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:165
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v29
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:456
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:460
+; ALIGNED-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:452
+; ALIGNED-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:448
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v50 offset:154
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v51 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:156
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:152
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v49 offset:150
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:148
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v48 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:144
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v28
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v50, 24, v26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:155
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:153
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:159
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:157
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:151
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:149
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:147
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:145
+; ALIGNED-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:472
+; ALIGNED-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:476
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:468
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:464
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v38 offset:138
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v39 offset:142
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v39 offset:140
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v38 offset:136
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v37 offset:134
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v37 offset:132
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v36 offset:130
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v36 offset:128
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:143
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v82, 24, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v51, 8, v26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:139
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:137
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:141
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:135
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:133
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:131
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:129
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v34 offset:122
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v35 offset:126
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v35 offset:124
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v34 offset:120
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v33 offset:118
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v33 offset:116
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v32 offset:114
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v32 offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v34, 24, v14
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:123
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:121
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:127
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:125
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:119
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v116 offset:117
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:115
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v114, 24, v10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:113
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v30 offset:106
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v31 offset:110
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v31 offset:108
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v30 offset:104
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v29 offset:102
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v29 offset:100
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v28 offset:98
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v28 offset:96
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:111
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v69, 24, v6
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v103, 24, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v83, 8, v18
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v35, 8, v14
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v115, 8, v10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:109
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v70, 8, v6
+; ALIGNED-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v102, 8, v27
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v101, 24, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v99, 24, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v86, 24, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v28, 24, v11
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:103
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v71, 24, v7
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v100, 8, v25
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v80, 24, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v81, 8, v24
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v64, 24, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v65, 8, v22
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v36, 24, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v37, 8, v23
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v38, 24, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v39, 8, v21
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v66, 24, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v67, 8, v20
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v98, 8, v19
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v84, 24, v17
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v85, 8, v17
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v32, 24, v16
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v33, 8, v16
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v87, 8, v15
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v53, 24, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v54, 8, v13
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v55, 24, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v113, 8, v12
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v29, 8, v11
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v30, 24, v9
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v31, 8, v9
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:107
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v52, 24, v8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:105
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v68, 8, v8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:101
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v112, 8, v7
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:99
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v48, 24, v5
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:97
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v49, 8, v5
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v26 offset:90
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v27 offset:94
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v27 offset:92
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v26 offset:88
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v25 offset:86
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v25 offset:84
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v24 offset:82
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v24 offset:80
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v24, 24, v4
+; ALIGNED-NEXT:    v_lshrrev_b32_e32 v25, 8, v4
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v50 offset:91
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v51 offset:89
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v103 offset:95
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v102 offset:93
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v101 offset:87
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v100 offset:85
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v80 offset:83
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v81 offset:81
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v22 offset:74
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v23 offset:78
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v23 offset:76
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v22 offset:72
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v21 offset:70
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v21 offset:68
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v20 offset:66
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v20 offset:64
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v64 offset:75
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v65 offset:73
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v36 offset:79
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v37 offset:77
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v38 offset:71
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v39 offset:69
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v66 offset:67
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v67 offset:65
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v82 offset:59
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v18 offset:58
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v83 offset:57
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v19 offset:62
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v99 offset:63
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v19 offset:60
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v98 offset:61
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v18 offset:56
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v84 offset:55
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v17 offset:54
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v85 offset:53
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v17 offset:52
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v32 offset:51
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v16 offset:50
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v33 offset:49
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v16 offset:48
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v14 offset:42
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v34 offset:43
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v35 offset:41
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v15 offset:46
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v86 offset:47
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v15 offset:44
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v87 offset:45
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v14 offset:40
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v53 offset:39
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v13 offset:38
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v54 offset:37
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v13 offset:36
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v55 offset:35
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v12 offset:34
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v113 offset:33
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v12 offset:32
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v10 offset:26
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v114 offset:27
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v115 offset:25
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v11 offset:30
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v28 offset:31
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v11 offset:28
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v29 offset:29
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v10 offset:24
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v30 offset:23
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v9 offset:22
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v31 offset:21
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v9 offset:20
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v52 offset:19
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v8 offset:18
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v68 offset:17
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v8 offset:16
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v6 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v69 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v70 offset:9
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v7 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v71 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v7 offset:12
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v112 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v6 offset:8
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v48 offset:7
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v5 offset:6
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v49 offset:5
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v5 offset:4
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v24 offset:3
+; ALIGNED-NEXT:    flat_store_byte_d16_hi v[96:97], v4 offset:2
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v25 offset:1
+; ALIGNED-NEXT:    flat_store_byte v[96:97], v4
+; ALIGNED-NEXT:    s_cbranch_scc0 .LBB7_5
+; ALIGNED-NEXT:  .LBB7_6: ; %Flow7
+; ALIGNED-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memmove_p0_p4_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b32 s4, exec_lo
+; UNROLL3-NEXT:    v_cmpx_ge_u64_e64 v[2:3], v[0:1]
+; UNROLL3-NEXT:    s_xor_b32 s6, exec_lo, s4
+; UNROLL3-NEXT:    s_cbranch_execz .LBB7_4
+; UNROLL3-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB7_2: ; %memmove_fwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[12:13], off offset:16
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[12:13], off
+; UNROLL3-NEXT:    global_load_dwordx4 v[12:15], v[12:13], off offset:32
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7] offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    s_cmp_lg_u64 s[4:5], 0x7e0
+; UNROLL3-NEXT:    s_cbranch_scc1 .LBB7_2
+; UNROLL3-NEXT:  ; %bb.3: ; %memmove_fwd_residual
+; UNROLL3-NEXT:    s_clause 0x1
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2016
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off offset:2032
+; UNROLL3-NEXT:    ; implicit-def: $vgpr2_vgpr3
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[8:11] offset:2032
+; UNROLL3-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; UNROLL3-NEXT:  .LBB7_4: ; %Flow4
+; UNROLL3-NEXT:    s_andn2_saveexec_b32 s8, s6
+; UNROLL3-NEXT:    s_cbranch_execz .LBB7_7
+; UNROLL3-NEXT:  ; %bb.5: ; %memmove_bwd_residual
+; UNROLL3-NEXT:    s_clause 0x1
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[2:3], off offset:2032
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[2:3], off offset:2016
+; UNROLL3-NEXT:    s_movk_i32 s6, 0xffd0
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0x7b0
+; UNROLL3-NEXT:    s_mov_b32 s7, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[4:7] offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[8:11] offset:2016
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB7_6: ; %memmove_bwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    v_add_co_u32 v12, vcc_lo, v2, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v13, vcc_lo, s5, v3, vcc_lo
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_clause 0x2
+; UNROLL3-NEXT:    global_load_dwordx4 v[4:7], v[12:13], off offset:16
+; UNROLL3-NEXT:    global_load_dwordx4 v[8:11], v[12:13], off
+; UNROLL3-NEXT:    global_load_dwordx4 v[12:15], v[12:13], off offset:32
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 0xffffffd0
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7] offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; UNROLL3-NEXT:    s_cbranch_scc0 .LBB7_6
+; UNROLL3-NEXT:  .LBB7_7: ; %Flow5
+; UNROLL3-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; UNROLL3-NEXT:    s_waitcnt lgkmcnt(0)
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memmove.p0.p4.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(4) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
+; CHECK-LABEL: memmove_p5_p5_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s4, exec_lo
+; CHECK-NEXT:    v_cmpx_ge_u32_e64 v1, v0
+; CHECK-NEXT:    s_xor_b32 s6, exec_lo, s4
+; CHECK-NEXT:    s_cbranch_execz .LBB8_3
+; CHECK-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0x800
+; CHECK-NEXT:  .LBB8_2: ; %memmove_fwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_clause 0x3e
+; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    buffer_load_dword v10, v1, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    buffer_load_dword v11, v1, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    buffer_load_dword v12, v1, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    buffer_load_dword v13, v1, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    buffer_load_dword v14, v1, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    buffer_load_dword v15, v1, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    buffer_load_dword v16, v1, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    buffer_load_dword v17, v1, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    buffer_load_dword v18, v1, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    buffer_load_dword v19, v1, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    buffer_load_dword v20, v1, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    buffer_load_dword v21, v1, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    buffer_load_dword v22, v1, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    buffer_load_dword v23, v1, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    buffer_load_dword v24, v1, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    buffer_load_dword v25, v1, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    buffer_load_dword v26, v1, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    buffer_load_dword v27, v1, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    buffer_load_dword v28, v1, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    buffer_load_dword v29, v1, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    buffer_load_dword v30, v1, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    buffer_load_dword v31, v1, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    buffer_load_dword v32, v1, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    buffer_load_dword v33, v1, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    buffer_load_dword v34, v1, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    buffer_load_dword v35, v1, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    buffer_load_dword v36, v1, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    buffer_load_dword v37, v1, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    buffer_load_dword v38, v1, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    buffer_load_dword v39, v1, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    buffer_load_dword v48, v1, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    buffer_load_dword v49, v1, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    buffer_load_dword v50, v1, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    buffer_load_dword v51, v1, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    buffer_load_dword v52, v1, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    buffer_load_dword v53, v1, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    buffer_load_dword v54, v1, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    buffer_load_dword v55, v1, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    buffer_load_dword v64, v1, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    buffer_load_dword v65, v1, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    buffer_load_dword v66, v1, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    buffer_load_dword v67, v1, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    buffer_load_dword v68, v1, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    buffer_load_dword v69, v1, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    buffer_load_dword v70, v1, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    buffer_load_dword v71, v1, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    buffer_load_dword v80, v1, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    buffer_load_dword v81, v1, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    buffer_load_dword v82, v1, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    buffer_load_dword v83, v1, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    buffer_load_dword v84, v1, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    buffer_load_dword v85, v1, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    buffer_load_dword v86, v1, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    buffer_load_dword v87, v1, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    buffer_load_dword v96, v1, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    buffer_load_dword v97, v1, s[0:3], 0 offen
+; CHECK-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
+; CHECK-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; CHECK-NEXT:    s_addc_u32 s5, s5, -1
+; CHECK-NEXT:    s_waitcnt vmcnt(62)
+; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    s_waitcnt vmcnt(61)
+; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    s_waitcnt vmcnt(60)
+; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(59)
+; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    s_waitcnt vmcnt(58)
+; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    s_waitcnt vmcnt(57)
+; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    s_waitcnt vmcnt(56)
+; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(55)
+; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    s_waitcnt vmcnt(54)
+; CHECK-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    s_waitcnt vmcnt(53)
+; CHECK-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    s_waitcnt vmcnt(52)
+; CHECK-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(51)
+; CHECK-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    s_waitcnt vmcnt(50)
+; CHECK-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    s_waitcnt vmcnt(49)
+; CHECK-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    s_waitcnt vmcnt(48)
+; CHECK-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(47)
+; CHECK-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    s_waitcnt vmcnt(46)
+; CHECK-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    s_waitcnt vmcnt(45)
+; CHECK-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    s_waitcnt vmcnt(44)
+; CHECK-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(43)
+; CHECK-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    s_waitcnt vmcnt(42)
+; CHECK-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    s_waitcnt vmcnt(41)
+; CHECK-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    s_waitcnt vmcnt(40)
+; CHECK-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(39)
+; CHECK-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    s_waitcnt vmcnt(38)
+; CHECK-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    s_waitcnt vmcnt(37)
+; CHECK-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    s_waitcnt vmcnt(36)
+; CHECK-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(35)
+; CHECK-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    s_waitcnt vmcnt(34)
+; CHECK-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    s_waitcnt vmcnt(33)
+; CHECK-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    s_waitcnt vmcnt(32)
+; CHECK-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(31)
+; CHECK-NEXT:    buffer_store_dword v34, v0, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    s_waitcnt vmcnt(30)
+; CHECK-NEXT:    buffer_store_dword v35, v0, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    s_waitcnt vmcnt(29)
+; CHECK-NEXT:    buffer_store_dword v36, v0, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    s_waitcnt vmcnt(28)
+; CHECK-NEXT:    buffer_store_dword v37, v0, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(27)
+; CHECK-NEXT:    buffer_store_dword v38, v0, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    s_waitcnt vmcnt(26)
+; CHECK-NEXT:    buffer_store_dword v39, v0, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    s_waitcnt vmcnt(25)
+; CHECK-NEXT:    buffer_store_dword v48, v0, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    s_waitcnt vmcnt(24)
+; CHECK-NEXT:    buffer_store_dword v49, v0, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(23)
+; CHECK-NEXT:    buffer_store_dword v50, v0, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    s_waitcnt vmcnt(22)
+; CHECK-NEXT:    buffer_store_dword v51, v0, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    s_waitcnt vmcnt(21)
+; CHECK-NEXT:    buffer_store_dword v52, v0, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    s_waitcnt vmcnt(20)
+; CHECK-NEXT:    buffer_store_dword v53, v0, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(19)
+; CHECK-NEXT:    buffer_store_dword v54, v0, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    s_waitcnt vmcnt(18)
+; CHECK-NEXT:    buffer_store_dword v55, v0, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    s_waitcnt vmcnt(17)
+; CHECK-NEXT:    buffer_store_dword v64, v0, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    s_waitcnt vmcnt(16)
+; CHECK-NEXT:    buffer_store_dword v65, v0, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    buffer_store_dword v66, v0, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    buffer_store_dword v67, v0, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    buffer_store_dword v68, v0, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    buffer_store_dword v69, v0, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    buffer_store_dword v70, v0, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    buffer_store_dword v71, v0, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    buffer_store_dword v80, v0, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    buffer_store_dword v81, v0, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    buffer_store_dword v82, v0, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    buffer_store_dword v83, v0, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    buffer_store_dword v84, v0, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    buffer_store_dword v85, v0, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    buffer_store_dword v86, v0, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    buffer_store_dword v87, v0, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    buffer_store_dword v96, v0, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    buffer_store_dword v97, v0, s[0:3], 0 offen
+; CHECK-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
+; CHECK-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2
+; CHECK-NEXT:  .LBB8_3: ; %Flow18
+; CHECK-NEXT:    s_andn2_saveexec_b32 s6, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB8_6
+; CHECK-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; CHECK-NEXT:    v_add_nc_u32_e32 v0, 0x700, v0
+; CHECK-NEXT:    v_add_nc_u32_e32 v1, 0x700, v1
+; CHECK-NEXT:    s_movk_i32 s4, 0xf800
+; CHECK-NEXT:    s_mov_b32 s5, -1
+; CHECK-NEXT:  .LBB8_5: ; %memmove_bwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_clause 0x3e
+; CHECK-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    buffer_load_dword v10, v1, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    buffer_load_dword v11, v1, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    buffer_load_dword v12, v1, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    buffer_load_dword v13, v1, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    buffer_load_dword v14, v1, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    buffer_load_dword v15, v1, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    buffer_load_dword v16, v1, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    buffer_load_dword v17, v1, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    buffer_load_dword v18, v1, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    buffer_load_dword v19, v1, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    buffer_load_dword v20, v1, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    buffer_load_dword v21, v1, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    buffer_load_dword v22, v1, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    buffer_load_dword v23, v1, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    buffer_load_dword v24, v1, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    buffer_load_dword v25, v1, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    buffer_load_dword v26, v1, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    buffer_load_dword v27, v1, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    buffer_load_dword v28, v1, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    buffer_load_dword v29, v1, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    buffer_load_dword v30, v1, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    buffer_load_dword v31, v1, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    buffer_load_dword v32, v1, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    buffer_load_dword v33, v1, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    buffer_load_dword v34, v1, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    buffer_load_dword v35, v1, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    buffer_load_dword v36, v1, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    buffer_load_dword v37, v1, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    buffer_load_dword v38, v1, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    buffer_load_dword v39, v1, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    buffer_load_dword v48, v1, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    buffer_load_dword v49, v1, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    buffer_load_dword v50, v1, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    buffer_load_dword v51, v1, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    buffer_load_dword v52, v1, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    buffer_load_dword v53, v1, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    buffer_load_dword v54, v1, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    buffer_load_dword v55, v1, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    buffer_load_dword v64, v1, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    buffer_load_dword v65, v1, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    buffer_load_dword v66, v1, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    buffer_load_dword v67, v1, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    buffer_load_dword v68, v1, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    buffer_load_dword v69, v1, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    buffer_load_dword v70, v1, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    buffer_load_dword v71, v1, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    buffer_load_dword v80, v1, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    buffer_load_dword v81, v1, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    buffer_load_dword v82, v1, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    buffer_load_dword v83, v1, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    buffer_load_dword v84, v1, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    buffer_load_dword v85, v1, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    buffer_load_dword v86, v1, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    buffer_load_dword v87, v1, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    buffer_load_dword v96, v1, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    buffer_load_dword v97, v1, s[0:3], 0 offen
+; CHECK-NEXT:    v_add_nc_u32_e32 v1, 0xffffff00, v1
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(62)
+; CHECK-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    s_waitcnt vmcnt(61)
+; CHECK-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    s_waitcnt vmcnt(60)
+; CHECK-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(59)
+; CHECK-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    s_waitcnt vmcnt(58)
+; CHECK-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    s_waitcnt vmcnt(57)
+; CHECK-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    s_waitcnt vmcnt(56)
+; CHECK-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(55)
+; CHECK-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    s_waitcnt vmcnt(54)
+; CHECK-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    s_waitcnt vmcnt(53)
+; CHECK-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    s_waitcnt vmcnt(52)
+; CHECK-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(51)
+; CHECK-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    s_waitcnt vmcnt(50)
+; CHECK-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    s_waitcnt vmcnt(49)
+; CHECK-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    s_waitcnt vmcnt(48)
+; CHECK-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(47)
+; CHECK-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    s_waitcnt vmcnt(46)
+; CHECK-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    s_waitcnt vmcnt(45)
+; CHECK-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    s_waitcnt vmcnt(44)
+; CHECK-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(43)
+; CHECK-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    s_waitcnt vmcnt(42)
+; CHECK-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    s_waitcnt vmcnt(41)
+; CHECK-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    s_waitcnt vmcnt(40)
+; CHECK-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(39)
+; CHECK-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    s_waitcnt vmcnt(38)
+; CHECK-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    s_waitcnt vmcnt(37)
+; CHECK-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    s_waitcnt vmcnt(36)
+; CHECK-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(35)
+; CHECK-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    s_waitcnt vmcnt(34)
+; CHECK-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    s_waitcnt vmcnt(33)
+; CHECK-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    s_waitcnt vmcnt(32)
+; CHECK-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    s_waitcnt vmcnt(31)
+; CHECK-NEXT:    buffer_store_dword v34, v0, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    s_waitcnt vmcnt(30)
+; CHECK-NEXT:    buffer_store_dword v35, v0, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    s_waitcnt vmcnt(29)
+; CHECK-NEXT:    buffer_store_dword v36, v0, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    s_waitcnt vmcnt(28)
+; CHECK-NEXT:    buffer_store_dword v37, v0, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    s_waitcnt vmcnt(27)
+; CHECK-NEXT:    buffer_store_dword v38, v0, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    s_waitcnt vmcnt(26)
+; CHECK-NEXT:    buffer_store_dword v39, v0, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    s_waitcnt vmcnt(25)
+; CHECK-NEXT:    buffer_store_dword v48, v0, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    s_waitcnt vmcnt(24)
+; CHECK-NEXT:    buffer_store_dword v49, v0, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(23)
+; CHECK-NEXT:    buffer_store_dword v50, v0, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    s_waitcnt vmcnt(22)
+; CHECK-NEXT:    buffer_store_dword v51, v0, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    s_waitcnt vmcnt(21)
+; CHECK-NEXT:    buffer_store_dword v52, v0, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    s_waitcnt vmcnt(20)
+; CHECK-NEXT:    buffer_store_dword v53, v0, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(19)
+; CHECK-NEXT:    buffer_store_dword v54, v0, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    s_waitcnt vmcnt(18)
+; CHECK-NEXT:    buffer_store_dword v55, v0, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    s_waitcnt vmcnt(17)
+; CHECK-NEXT:    buffer_store_dword v64, v0, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    s_waitcnt vmcnt(16)
+; CHECK-NEXT:    buffer_store_dword v65, v0, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    s_waitcnt vmcnt(15)
+; CHECK-NEXT:    buffer_store_dword v66, v0, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    s_waitcnt vmcnt(14)
+; CHECK-NEXT:    buffer_store_dword v67, v0, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    buffer_store_dword v68, v0, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    buffer_store_dword v69, v0, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    s_waitcnt vmcnt(11)
+; CHECK-NEXT:    buffer_store_dword v70, v0, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    s_waitcnt vmcnt(10)
+; CHECK-NEXT:    buffer_store_dword v71, v0, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    buffer_store_dword v80, v0, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    buffer_store_dword v81, v0, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(7)
+; CHECK-NEXT:    buffer_store_dword v82, v0, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    s_waitcnt vmcnt(6)
+; CHECK-NEXT:    buffer_store_dword v83, v0, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    buffer_store_dword v84, v0, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    buffer_store_dword v85, v0, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(3)
+; CHECK-NEXT:    buffer_store_dword v86, v0, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    s_waitcnt vmcnt(2)
+; CHECK-NEXT:    buffer_store_dword v87, v0, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    buffer_store_dword v96, v0, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    buffer_store_dword v97, v0, s[0:3], 0 offen
+; CHECK-NEXT:    v_add_nc_u32_e32 v0, 0xffffff00, v0
+; CHECK-NEXT:    s_cmp_eq_u64 s[4:5], 0
+; CHECK-NEXT:    s_cbranch_scc0 .LBB8_5
+; CHECK-NEXT:  .LBB8_6: ; %Flow19
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s6
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memmove_p5_p5_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_mov_b32 s4, exec_lo
+; ALIGNED-NEXT:    v_cmpx_ge_u32_e64 v1, v0
+; ALIGNED-NEXT:    s_xor_b32 s6, exec_lo, s4
+; ALIGNED-NEXT:    s_cbranch_execz .LBB8_3
+; ALIGNED-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0x800
+; ALIGNED-NEXT:  .LBB8_2: ; %memmove_fwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, -1
+; ALIGNED-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x3e
+; ALIGNED-NEXT:    buffer_load_ubyte v123, v1, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_load_ubyte v122, v1, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_load_ubyte v121, v1, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_load_ubyte v120, v1, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_load_ubyte v111, v1, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_load_ubyte v110, v1, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_load_ubyte v109, v1, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_load_ubyte v108, v1, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_load_ubyte v107, v1, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v1, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_load_ubyte v105, v1, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_load_ubyte v104, v1, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v1, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_load_ubyte v94, v1, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_load_ubyte v93, v1, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v1, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v1, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v1, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_load_ubyte v89, v1, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v1, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_load_ubyte v79, v1, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_load_ubyte v78, v1, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_load_ubyte v77, v1, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_load_ubyte v76, v1, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_load_ubyte v75, v1, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    buffer_load_ubyte v74, v1, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_load_ubyte v73, v1, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_load_ubyte v72, v1, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_load_ubyte v63, v1, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_load_ubyte v62, v1, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_load_ubyte v61, v1, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_load_ubyte v60, v1, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_load_ubyte v59, v1, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v1, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v1, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_load_ubyte v56, v1, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_load_ubyte v47, v1, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    buffer_load_ubyte v46, v1, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    buffer_load_ubyte v45, v1, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v1, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v1, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_load_ubyte v42, v1, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_load_ubyte v41, v1, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_load_ubyte v40, v1, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_ubyte v119, v1, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    buffer_load_ubyte v118, v1, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    buffer_load_ubyte v117, v1, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_load_ubyte v116, v1, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_load_ubyte v115, v1, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    buffer_load_ubyte v114, v1, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_load_ubyte v113, v1, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_load_ubyte v112, v1, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_load_ubyte v103, v1, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_load_ubyte v102, v1, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_load_ubyte v101, v1, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_load_ubyte v100, v1, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_load_ubyte v99, v1, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_load_ubyte v98, v1, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_load_ubyte v97, v1, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_load_ubyte v96, v1, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_load_ubyte v87, v1, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    buffer_load_ubyte v86, v1, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    buffer_load_ubyte v85, v1, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    s_clause 0x3a
+; ALIGNED-NEXT:    buffer_load_ubyte v84, v1, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_load_ubyte v83, v1, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    buffer_load_ubyte v82, v1, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v1, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v1, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v1, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v1, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v1, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v1, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v1, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v1, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v1, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v1, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v1, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v1, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v1, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v1, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v1, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v1, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v1, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v1, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v1, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v1, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v1, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v1, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v1, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v1, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v1, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v1, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v1, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v1, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v1, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v1, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v1, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v1, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v1, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v1, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v1, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v1, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v1, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v1, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v1, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v1, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v1, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v1, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v1, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v1, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v1, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v1, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v1, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v1, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v1, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v1, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v1, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v1, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v1, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v127, v1, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_load_ubyte v126, v1, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v1, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_load_ubyte v124, v1, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v1, 0x100, v1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_store_byte v123, v0, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_store_byte v122, v0, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_store_byte v121, v0, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_store_byte v120, v0, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_store_byte v111, v0, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_store_byte v110, v0, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_store_byte v109, v0, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_store_byte v108, v0, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_store_byte v107, v0, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_store_byte v106, v0, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_store_byte v105, v0, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_store_byte v104, v0, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_store_byte v95, v0, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_store_byte v94, v0, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_store_byte v93, v0, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_store_byte v92, v0, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_store_byte v91, v0, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    buffer_store_byte v90, v0, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_store_byte v89, v0, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_store_byte v88, v0, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_store_byte v79, v0, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_store_byte v78, v0, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_store_byte v77, v0, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_store_byte v76, v0, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_store_byte v75, v0, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    buffer_store_byte v74, v0, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_store_byte v73, v0, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_store_byte v72, v0, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_store_byte v63, v0, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_store_byte v62, v0, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_store_byte v61, v0, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_store_byte v60, v0, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_store_byte v59, v0, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_store_byte v58, v0, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_store_byte v57, v0, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_store_byte v56, v0, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_store_byte v47, v0, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    buffer_store_byte v46, v0, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    buffer_store_byte v45, v0, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_store_byte v44, v0, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_store_byte v43, v0, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_store_byte v42, v0, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_store_byte v41, v0, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_store_byte v40, v0, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_store_byte v119, v0, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    buffer_store_byte v118, v0, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    buffer_store_byte v117, v0, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_store_byte v116, v0, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_store_byte v115, v0, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    buffer_store_byte v114, v0, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_store_byte v113, v0, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_store_byte v112, v0, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_store_byte v103, v0, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_store_byte v102, v0, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_store_byte v101, v0, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_store_byte v100, v0, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_store_byte v99, v0, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_store_byte v98, v0, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_store_byte v97, v0, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_store_byte v96, v0, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_store_byte v87, v0, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    buffer_store_byte v86, v0, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    buffer_store_byte v85, v0, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_store_byte v84, v0, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_store_byte v83, v0, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    buffer_store_byte v82, v0, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_store_byte v81, v0, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_store_byte v80, v0, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_store_byte v71, v0, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_store_byte v70, v0, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_store_byte v69, v0, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_store_byte v68, v0, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_store_byte v67, v0, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_store_byte v66, v0, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_store_byte v65, v0, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_store_byte v64, v0, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_store_byte v55, v0, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    buffer_store_byte v54, v0, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    buffer_store_byte v53, v0, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_store_byte v52, v0, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_store_byte v51, v0, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    buffer_store_byte v50, v0, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_store_byte v49, v0, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_store_byte v48, v0, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_store_byte v39, v0, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_store_byte v38, v0, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_store_byte v37, v0, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_store_byte v36, v0, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_store_byte v35, v0, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_store_byte v34, v0, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_store_byte v33, v0, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_store_byte v32, v0, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_store_byte v31, v0, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    buffer_store_byte v30, v0, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    buffer_store_byte v29, v0, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_store_byte v28, v0, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_store_byte v27, v0, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    buffer_store_byte v26, v0, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_store_byte v25, v0, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_store_byte v24, v0, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_store_byte v23, v0, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_store_byte v22, v0, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_store_byte v21, v0, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_store_byte v20, v0, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    buffer_store_byte v19, v0, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    buffer_store_byte v18, v0, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_store_byte v17, v0, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_store_byte v16, v0, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    buffer_store_byte v15, v0, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_store_byte v14, v0, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_store_byte v13, v0, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_store_byte v12, v0, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    buffer_store_byte v11, v0, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_store_byte v7, v0, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_store_byte v6, v0, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    buffer_store_byte v5, v0, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_store_byte v4, v0, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    buffer_store_byte v3, v0, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    buffer_store_byte v127, v0, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_store_byte v126, v0, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    buffer_store_byte v125, v0, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_store_byte v124, v0, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v0, 0x100, v0
+; ALIGNED-NEXT:    s_cbranch_scc1 .LBB8_2
+; ALIGNED-NEXT:  .LBB8_3: ; %Flow18
+; ALIGNED-NEXT:    s_andn2_saveexec_b32 s6, s6
+; ALIGNED-NEXT:    s_cbranch_execz .LBB8_6
+; ALIGNED-NEXT:  ; %bb.4: ; %memmove_bwd_loop.preheader
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v0, 0x700, v0
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v1, 0x700, v1
+; ALIGNED-NEXT:    s_movk_i32 s4, 0xf800
+; ALIGNED-NEXT:    s_mov_b32 s5, -1
+; ALIGNED-NEXT:  .LBB8_5: ; %memmove_bwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_cmp_eq_u64 s[4:5], 0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x3e
+; ALIGNED-NEXT:    buffer_load_ubyte v115, v1, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    buffer_load_ubyte v114, v1, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_load_ubyte v113, v1, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_load_ubyte v112, v1, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_load_ubyte v103, v1, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_load_ubyte v102, v1, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_load_ubyte v101, v1, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_load_ubyte v100, v1, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_load_ubyte v99, v1, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_load_ubyte v98, v1, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_load_ubyte v97, v1, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_load_ubyte v96, v1, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_load_ubyte v87, v1, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    buffer_load_ubyte v86, v1, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    buffer_load_ubyte v85, v1, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_load_ubyte v84, v1, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_load_ubyte v83, v1, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    buffer_load_ubyte v82, v1, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v1, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v1, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v1, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v1, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v1, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v1, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v1, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v1, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v1, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v1, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v1, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v1, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v1, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v1, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v1, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v1, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v1, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v1, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v1, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v1, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v1, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v1, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v1, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v1, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v1, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v1, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v1, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v1, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v1, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v1, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v1, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v1, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v1, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v1, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v1, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v1, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v1, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v1, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v1, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v1, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v1, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v1, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v1, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v1, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v1, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    s_clause 0xa
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v1, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v1, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v1, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v1, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v1, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v1, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v1, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v1, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v1, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v1, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x34
+; ALIGNED-NEXT:    buffer_load_ubyte v127, v1, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_load_ubyte v126, v1, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v1, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_load_ubyte v124, v1, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_load_ubyte v123, v1, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_load_ubyte v122, v1, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    buffer_load_ubyte v121, v1, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_load_ubyte v120, v1, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_load_ubyte v111, v1, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    buffer_load_ubyte v110, v1, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_load_ubyte v109, v1, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_load_ubyte v108, v1, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    buffer_load_ubyte v107, v1, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v1, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    buffer_load_ubyte v105, v1, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    buffer_load_ubyte v104, v1, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v1, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_load_ubyte v94, v1, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    buffer_load_ubyte v93, v1, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v1, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v1, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v1, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    buffer_load_ubyte v89, v1, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v1, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_load_ubyte v79, v1, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    buffer_load_ubyte v78, v1, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_load_ubyte v77, v1, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_load_ubyte v76, v1, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    buffer_load_ubyte v75, v1, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_load_ubyte v74, v1, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    buffer_load_ubyte v73, v1, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    buffer_load_ubyte v72, v1, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_load_ubyte v63, v1, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_load_ubyte v62, v1, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    buffer_load_ubyte v61, v1, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_load_ubyte v60, v1, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_load_ubyte v59, v1, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v1, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v1, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    buffer_load_ubyte v56, v1, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_load_ubyte v47, v1, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    buffer_load_ubyte v46, v1, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    buffer_load_ubyte v45, v1, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v1, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v1, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_load_ubyte v42, v1, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    buffer_load_ubyte v41, v1, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_load_ubyte v40, v1, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_load_ubyte v119, v1, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_load_ubyte v118, v1, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_load_ubyte v117, v1, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    buffer_load_ubyte v116, v1, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v1, s[0:3], 0 offen
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v1, 0xffffff00, v1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:708 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:704 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:700 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:696 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:692 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:688 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:684 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:680 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:676 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:672 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:668 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:664 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:660 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:656 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:652 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:648 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:644 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:640 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:636 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:632 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:628 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:624 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:620 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:616 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:612 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:608 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:604 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:600 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:596 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:592 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:588 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:584 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:580 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:576 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:572 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:568 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:564 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:560 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:556 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:552 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:548 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:544 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:540 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:536 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:532 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:528 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:524 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:520 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:516 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:512 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:508 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:504 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:500 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:496 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:492 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_store_byte v115, v0, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    buffer_store_byte v114, v0, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_store_byte v113, v0, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_store_byte v112, v0, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_store_byte v103, v0, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_store_byte v102, v0, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_store_byte v101, v0, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_store_byte v100, v0, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_store_byte v99, v0, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_store_byte v98, v0, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_store_byte v97, v0, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_store_byte v96, v0, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_store_byte v87, v0, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    buffer_store_byte v86, v0, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    buffer_store_byte v85, v0, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_store_byte v84, v0, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_store_byte v83, v0, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    buffer_store_byte v82, v0, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_store_byte v81, v0, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_store_byte v80, v0, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_store_byte v71, v0, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_store_byte v70, v0, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_store_byte v69, v0, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_store_byte v68, v0, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_store_byte v67, v0, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_store_byte v66, v0, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_store_byte v65, v0, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_store_byte v64, v0, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_store_byte v55, v0, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    buffer_store_byte v54, v0, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    buffer_store_byte v53, v0, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_store_byte v52, v0, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_store_byte v51, v0, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    buffer_store_byte v50, v0, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_store_byte v49, v0, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_store_byte v48, v0, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_store_byte v39, v0, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_store_byte v38, v0, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_store_byte v37, v0, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_store_byte v36, v0, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_store_byte v35, v0, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_store_byte v34, v0, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_store_byte v33, v0, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_store_byte v32, v0, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_store_byte v31, v0, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    buffer_store_byte v30, v0, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    buffer_store_byte v29, v0, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_store_byte v28, v0, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_store_byte v27, v0, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    buffer_store_byte v26, v0, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_store_byte v25, v0, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_store_byte v24, v0, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_store_byte v23, v0, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_store_byte v22, v0, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_store_byte v21, v0, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_store_byte v20, v0, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    buffer_store_byte v19, v0, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    buffer_store_byte v18, v0, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_store_byte v17, v0, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_store_byte v16, v0, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    buffer_store_byte v15, v0, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_store_byte v14, v0, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_store_byte v13, v0, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_store_byte v12, v0, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    buffer_store_byte v11, v0, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_store_byte v10, v0, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_store_byte v9, v0, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    buffer_store_byte v8, v0, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_store_byte v7, v0, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_store_byte v6, v0, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    buffer_store_byte v5, v0, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_store_byte v4, v0, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    buffer_store_byte v3, v0, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:488 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    buffer_store_byte v127, v0, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_store_byte v126, v0, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    buffer_store_byte v125, v0, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_store_byte v124, v0, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_store_byte v123, v0, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_store_byte v122, v0, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    buffer_store_byte v121, v0, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_store_byte v120, v0, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_store_byte v111, v0, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    buffer_store_byte v110, v0, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_store_byte v109, v0, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_store_byte v108, v0, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    buffer_store_byte v107, v0, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_store_byte v106, v0, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    buffer_store_byte v105, v0, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    buffer_store_byte v104, v0, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_store_byte v95, v0, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_store_byte v94, v0, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    buffer_store_byte v93, v0, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_store_byte v92, v0, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_store_byte v91, v0, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_store_byte v90, v0, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    buffer_store_byte v89, v0, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_store_byte v88, v0, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_store_byte v79, v0, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    buffer_store_byte v78, v0, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_store_byte v77, v0, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_store_byte v76, v0, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    buffer_store_byte v75, v0, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_store_byte v74, v0, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    buffer_store_byte v73, v0, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    buffer_store_byte v72, v0, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_store_byte v63, v0, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_store_byte v62, v0, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    buffer_store_byte v61, v0, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_store_byte v60, v0, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_store_byte v59, v0, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_store_byte v58, v0, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    buffer_store_byte v57, v0, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    buffer_store_byte v56, v0, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_store_byte v47, v0, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    buffer_store_byte v46, v0, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    buffer_store_byte v45, v0, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_store_byte v44, v0, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_store_byte v43, v0, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_store_byte v42, v0, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    buffer_store_byte v41, v0, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_store_byte v40, v0, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_store_byte v119, v0, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_store_byte v118, v0, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_store_byte v117, v0, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    buffer_store_byte v116, v0, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:484 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:480 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:476 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:472 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:468 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_byte v2, v0, s[0:3], 0 offen
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v0, 0xffffff00, v0
+; ALIGNED-NEXT:    s_cbranch_scc0 .LBB8_5
+; ALIGNED-NEXT:  .LBB8_6: ; %Flow19
+; ALIGNED-NEXT:    s_or_b32 exec_lo, exec_lo, s6
+; ALIGNED-NEXT:    s_clause 0x2f
+; ALIGNED-NEXT:    buffer_load_dword v127, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memmove_p5_p5_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    s_mov_b32 s4, exec_lo
+; UNROLL3-NEXT:    v_cmpx_ge_u32_e64 v1, v0
+; UNROLL3-NEXT:    s_xor_b32 s6, exec_lo, s4
+; UNROLL3-NEXT:    s_cbranch_execz .LBB8_4
+; UNROLL3-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; UNROLL3-NEXT:    v_mov_b32_e32 v2, v1
+; UNROLL3-NEXT:    v_mov_b32_e32 v3, v0
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0x7e0
+; UNROLL3-NEXT:  .LBB8_2: ; %memmove_fwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    s_clause 0xb
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    buffer_load_dword v11, v2, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    buffer_load_dword v12, v2, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    buffer_load_dword v13, v2, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    buffer_load_dword v14, v2, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    buffer_load_dword v15, v2, s[0:3], 0 offen
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v2, 48, v2
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 0xffffffd0
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(11)
+; UNROLL3-NEXT:    buffer_store_dword v4, v3, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    s_waitcnt vmcnt(10)
+; UNROLL3-NEXT:    buffer_store_dword v5, v3, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    s_waitcnt vmcnt(9)
+; UNROLL3-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    s_waitcnt vmcnt(8)
+; UNROLL3-NEXT:    buffer_store_dword v7, v3, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    s_waitcnt vmcnt(7)
+; UNROLL3-NEXT:    buffer_store_dword v8, v3, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    s_waitcnt vmcnt(6)
+; UNROLL3-NEXT:    buffer_store_dword v9, v3, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    s_waitcnt vmcnt(5)
+; UNROLL3-NEXT:    buffer_store_dword v10, v3, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
+; UNROLL3-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v12, v3, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v13, v3, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v14, v3, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v15, v3, s[0:3], 0 offen
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v3, 48, v3
+; UNROLL3-NEXT:    s_cmp_lg_u64 s[4:5], 0
+; UNROLL3-NEXT:    s_cbranch_scc1 .LBB8_2
+; UNROLL3-NEXT:  ; %bb.3: ; %memmove_fwd_residual
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    ; implicit-def: $vgpr1
+; UNROLL3-NEXT:    ; implicit-def: $vgpr0
+; UNROLL3-NEXT:  .LBB8_4: ; %Flow16
+; UNROLL3-NEXT:    s_andn2_saveexec_b32 s6, s6
+; UNROLL3-NEXT:    s_cbranch_execz .LBB8_7
+; UNROLL3-NEXT:  ; %bb.5: ; %memmove_bwd_residual
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v2, v1, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    s_movk_i32 s4, 0xf820
+; UNROLL3-NEXT:    s_mov_b32 s5, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v2, 0x7b0, v0
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v1, 0x7b0, v1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:  .LBB8_6: ; %memmove_bwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    s_clause 0xb
+; UNROLL3-NEXT:    buffer_load_dword v0, v1, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    buffer_load_dword v3, v1, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    buffer_load_dword v4, v1, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    buffer_load_dword v5, v1, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    buffer_load_dword v6, v1, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    buffer_load_dword v7, v1, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    buffer_load_dword v8, v1, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    buffer_load_dword v9, v1, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    buffer_load_dword v10, v1, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    buffer_load_dword v11, v1, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    buffer_load_dword v12, v1, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    buffer_load_dword v13, v1, s[0:3], 0 offen
+; UNROLL3-NEXT:    v_subrev_nc_u32_e32 v1, 48, v1
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(11)
+; UNROLL3-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    s_waitcnt vmcnt(10)
+; UNROLL3-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    s_waitcnt vmcnt(9)
+; UNROLL3-NEXT:    buffer_store_dword v4, v2, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    s_waitcnt vmcnt(8)
+; UNROLL3-NEXT:    buffer_store_dword v5, v2, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    s_waitcnt vmcnt(7)
+; UNROLL3-NEXT:    buffer_store_dword v6, v2, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    s_waitcnt vmcnt(6)
+; UNROLL3-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    s_waitcnt vmcnt(5)
+; UNROLL3-NEXT:    buffer_store_dword v8, v2, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
+; UNROLL3-NEXT:    buffer_store_dword v9, v2, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    s_waitcnt vmcnt(3)
+; UNROLL3-NEXT:    buffer_store_dword v10, v2, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    s_waitcnt vmcnt(2)
+; UNROLL3-NEXT:    buffer_store_dword v11, v2, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    s_waitcnt vmcnt(1)
+; UNROLL3-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
+; UNROLL3-NEXT:    v_subrev_nc_u32_e32 v2, 48, v2
+; UNROLL3-NEXT:    s_cmp_eq_u64 s[4:5], 0
+; UNROLL3-NEXT:    s_cbranch_scc0 .LBB8_6
+; UNROLL3-NEXT:  .LBB8_7: ; %Flow17
+; UNROLL3-NEXT:    s_or_b32 exec_lo, exec_lo, s6
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+define void @memmove_p0_p5_sz2048(ptr addrspace(0) align 1 %dst, ptr addrspace(5) align 1 readonly %src) {
+; CHECK-LABEL: memmove_p0_p5_sz2048:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0
+; CHECK-NEXT:    s_mov_b32 s6, exec_lo
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, -1, v0, vcc_lo
+; CHECK-NEXT:    v_cmpx_ge_u32_e64 v2, v3
+; CHECK-NEXT:    s_xor_b32 s6, exec_lo, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB9_2
+; CHECK-NEXT:  .LBB9_1: ; %memmove_fwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_clause 0x3e
+; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    buffer_load_dword v11, v2, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    buffer_load_dword v12, v2, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    buffer_load_dword v13, v2, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    buffer_load_dword v14, v2, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    buffer_load_dword v18, v2, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    buffer_load_dword v17, v2, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    buffer_load_dword v16, v2, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    buffer_load_dword v15, v2, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    buffer_load_dword v22, v2, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    buffer_load_dword v21, v2, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    buffer_load_dword v20, v2, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    buffer_load_dword v19, v2, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    buffer_load_dword v26, v2, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    buffer_load_dword v25, v2, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    buffer_load_dword v24, v2, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    buffer_load_dword v23, v2, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    buffer_load_dword v30, v2, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    buffer_load_dword v29, v2, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    buffer_load_dword v28, v2, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    buffer_load_dword v27, v2, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    buffer_load_dword v34, v2, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    buffer_load_dword v33, v2, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    buffer_load_dword v32, v2, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    buffer_load_dword v31, v2, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    buffer_load_dword v38, v2, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    buffer_load_dword v37, v2, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    buffer_load_dword v36, v2, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    buffer_load_dword v35, v2, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    buffer_load_dword v51, v2, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    buffer_load_dword v50, v2, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    buffer_load_dword v49, v2, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    buffer_load_dword v48, v2, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    buffer_load_dword v55, v2, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_load_dword v54, v2, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    buffer_load_dword v53, v2, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    buffer_load_dword v52, v2, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    buffer_load_dword v67, v2, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    buffer_load_dword v66, v2, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    buffer_load_dword v65, v2, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    buffer_load_dword v64, v2, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    buffer_load_dword v71, v2, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    buffer_load_dword v70, v2, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    buffer_load_dword v69, v2, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    buffer_load_dword v68, v2, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    buffer_load_dword v83, v2, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    buffer_load_dword v82, v2, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    buffer_load_dword v81, v2, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    buffer_load_dword v80, v2, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    buffer_load_dword v87, v2, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    buffer_load_dword v86, v2, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    buffer_load_dword v85, v2, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    buffer_load_dword v84, v2, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    buffer_load_dword v96, v2, s[0:3], 0 offen
+; CHECK-NEXT:    buffer_load_dword v97, v2, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    buffer_load_dword v98, v2, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    buffer_load_dword v99, v2, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    s_add_u32 s4, s4, 0x100
+; CHECK-NEXT:    v_add_nc_u32_e32 v2, 0x100, v2
+; CHECK-NEXT:    s_addc_u32 s5, s5, 0
+; CHECK-NEXT:    s_waitcnt vmcnt(20)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(16)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:224
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(12)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:192
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[31:34] offset:176
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[35:38] offset:160
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[27:30] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(8)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:128
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[19:22] offset:112
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[23:26] offset:96
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[15:18] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(4)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:64
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[11:14] offset:48
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[7:10] offset:32
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[3:6] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
+; CHECK-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; CHECK-NEXT:    s_cbranch_scc1 .LBB9_1
+; CHECK-NEXT:  .LBB9_2: ; %Flow10
+; CHECK-NEXT:    s_andn2_saveexec_b32 s8, s6
+; CHECK-NEXT:    s_cbranch_execz .LBB9_5
+; CHECK-NEXT:  ; %bb.3: ; %memmove_bwd_loop.preheader
+; CHECK-NEXT:    v_add_nc_u32_e32 v2, 0x700, v2
+; CHECK-NEXT:    s_movk_i32 s6, 0xff00
+; CHECK-NEXT:    s_mov_b64 s[4:5], 0x700
+; CHECK-NEXT:    s_mov_b32 s7, -1
+; CHECK-NEXT:  .LBB9_4: ; %memmove_bwd_loop
+; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    s_clause 0x3e
+; CHECK-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:20
+; CHECK-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:24
+; CHECK-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:28
+; CHECK-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:32
+; CHECK-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:36
+; CHECK-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:40
+; CHECK-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:44
+; CHECK-NEXT:    buffer_load_dword v11, v2, s[0:3], 0 offen offset:48
+; CHECK-NEXT:    buffer_load_dword v12, v2, s[0:3], 0 offen offset:52
+; CHECK-NEXT:    buffer_load_dword v13, v2, s[0:3], 0 offen offset:56
+; CHECK-NEXT:    buffer_load_dword v14, v2, s[0:3], 0 offen offset:60
+; CHECK-NEXT:    buffer_load_dword v18, v2, s[0:3], 0 offen offset:124
+; CHECK-NEXT:    buffer_load_dword v17, v2, s[0:3], 0 offen offset:120
+; CHECK-NEXT:    buffer_load_dword v16, v2, s[0:3], 0 offen offset:116
+; CHECK-NEXT:    buffer_load_dword v15, v2, s[0:3], 0 offen offset:112
+; CHECK-NEXT:    buffer_load_dword v22, v2, s[0:3], 0 offen offset:108
+; CHECK-NEXT:    buffer_load_dword v21, v2, s[0:3], 0 offen offset:104
+; CHECK-NEXT:    buffer_load_dword v20, v2, s[0:3], 0 offen offset:100
+; CHECK-NEXT:    buffer_load_dword v19, v2, s[0:3], 0 offen offset:96
+; CHECK-NEXT:    buffer_load_dword v26, v2, s[0:3], 0 offen offset:252
+; CHECK-NEXT:    buffer_load_dword v25, v2, s[0:3], 0 offen offset:248
+; CHECK-NEXT:    buffer_load_dword v24, v2, s[0:3], 0 offen offset:244
+; CHECK-NEXT:    buffer_load_dword v23, v2, s[0:3], 0 offen offset:240
+; CHECK-NEXT:    buffer_load_dword v30, v2, s[0:3], 0 offen offset:236
+; CHECK-NEXT:    buffer_load_dword v29, v2, s[0:3], 0 offen offset:232
+; CHECK-NEXT:    buffer_load_dword v28, v2, s[0:3], 0 offen offset:228
+; CHECK-NEXT:    buffer_load_dword v27, v2, s[0:3], 0 offen offset:224
+; CHECK-NEXT:    buffer_load_dword v34, v2, s[0:3], 0 offen offset:220
+; CHECK-NEXT:    buffer_load_dword v33, v2, s[0:3], 0 offen offset:216
+; CHECK-NEXT:    buffer_load_dword v32, v2, s[0:3], 0 offen offset:212
+; CHECK-NEXT:    buffer_load_dword v31, v2, s[0:3], 0 offen offset:208
+; CHECK-NEXT:    buffer_load_dword v38, v2, s[0:3], 0 offen offset:204
+; CHECK-NEXT:    buffer_load_dword v37, v2, s[0:3], 0 offen offset:200
+; CHECK-NEXT:    buffer_load_dword v36, v2, s[0:3], 0 offen offset:196
+; CHECK-NEXT:    buffer_load_dword v35, v2, s[0:3], 0 offen offset:192
+; CHECK-NEXT:    buffer_load_dword v51, v2, s[0:3], 0 offen offset:188
+; CHECK-NEXT:    buffer_load_dword v50, v2, s[0:3], 0 offen offset:184
+; CHECK-NEXT:    buffer_load_dword v49, v2, s[0:3], 0 offen offset:180
+; CHECK-NEXT:    buffer_load_dword v48, v2, s[0:3], 0 offen offset:176
+; CHECK-NEXT:    buffer_load_dword v55, v2, s[0:3], 0 offen offset:172
+; CHECK-NEXT:    buffer_load_dword v54, v2, s[0:3], 0 offen offset:168
+; CHECK-NEXT:    buffer_load_dword v53, v2, s[0:3], 0 offen offset:164
+; CHECK-NEXT:    buffer_load_dword v52, v2, s[0:3], 0 offen offset:160
+; CHECK-NEXT:    buffer_load_dword v67, v2, s[0:3], 0 offen offset:156
+; CHECK-NEXT:    buffer_load_dword v66, v2, s[0:3], 0 offen offset:152
+; CHECK-NEXT:    buffer_load_dword v65, v2, s[0:3], 0 offen offset:148
+; CHECK-NEXT:    buffer_load_dword v64, v2, s[0:3], 0 offen offset:144
+; CHECK-NEXT:    buffer_load_dword v71, v2, s[0:3], 0 offen offset:140
+; CHECK-NEXT:    buffer_load_dword v70, v2, s[0:3], 0 offen offset:136
+; CHECK-NEXT:    buffer_load_dword v69, v2, s[0:3], 0 offen offset:132
+; CHECK-NEXT:    buffer_load_dword v68, v2, s[0:3], 0 offen offset:128
+; CHECK-NEXT:    buffer_load_dword v83, v2, s[0:3], 0 offen offset:92
+; CHECK-NEXT:    buffer_load_dword v82, v2, s[0:3], 0 offen offset:88
+; CHECK-NEXT:    buffer_load_dword v81, v2, s[0:3], 0 offen offset:84
+; CHECK-NEXT:    buffer_load_dword v80, v2, s[0:3], 0 offen offset:80
+; CHECK-NEXT:    buffer_load_dword v87, v2, s[0:3], 0 offen offset:76
+; CHECK-NEXT:    buffer_load_dword v86, v2, s[0:3], 0 offen offset:72
+; CHECK-NEXT:    buffer_load_dword v85, v2, s[0:3], 0 offen offset:68
+; CHECK-NEXT:    buffer_load_dword v84, v2, s[0:3], 0 offen offset:64
+; CHECK-NEXT:    buffer_load_dword v96, v2, s[0:3], 0 offen
+; CHECK-NEXT:    buffer_load_dword v97, v2, s[0:3], 0 offen offset:4
+; CHECK-NEXT:    buffer_load_dword v98, v2, s[0:3], 0 offen offset:8
+; CHECK-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:16
+; CHECK-NEXT:    buffer_load_dword v99, v2, s[0:3], 0 offen offset:12
+; CHECK-NEXT:    v_add_co_u32 v100, vcc_lo, v0, s4
+; CHECK-NEXT:    v_add_co_ci_u32_e32 v101, vcc_lo, s5, v1, vcc_lo
+; CHECK-NEXT:    v_add_nc_u32_e32 v2, 0xffffff00, v2
+; CHECK-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; CHECK-NEXT:    s_addc_u32 s5, s5, -1
+; CHECK-NEXT:    s_waitcnt vmcnt(41)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[23:26] offset:240
+; CHECK-NEXT:    s_waitcnt vmcnt(37)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[27:30] offset:224
+; CHECK-NEXT:    s_waitcnt vmcnt(33)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[31:34] offset:208
+; CHECK-NEXT:    s_waitcnt vmcnt(29)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[35:38] offset:192
+; CHECK-NEXT:    s_waitcnt vmcnt(25)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[48:51] offset:176
+; CHECK-NEXT:    s_waitcnt vmcnt(21)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[52:55] offset:160
+; CHECK-NEXT:    s_waitcnt vmcnt(17)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[64:67] offset:144
+; CHECK-NEXT:    s_waitcnt vmcnt(13)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[68:71] offset:128
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[15:18] offset:112
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[19:22] offset:96
+; CHECK-NEXT:    s_waitcnt vmcnt(9)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[80:83] offset:80
+; CHECK-NEXT:    s_waitcnt vmcnt(5)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[84:87] offset:64
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[11:14] offset:48
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[7:10] offset:32
+; CHECK-NEXT:    s_waitcnt vmcnt(1)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[3:6] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    flat_store_dwordx4 v[100:101], v[96:99]
+; CHECK-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; CHECK-NEXT:    s_cbranch_scc0 .LBB9_4
+; CHECK-NEXT:  .LBB9_5: ; %Flow11
+; CHECK-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+;
+; ALIGNED-LABEL: memmove_p0_p5_sz2048:
+; ALIGNED:       ; %bb.0: ; %entry
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v40, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v41, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v42, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v44, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v45, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v46, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v47, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v56, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v58, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v59, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v60, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v61, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v62, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v63, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v72, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v73, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v74, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v75, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v79, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v88, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v89, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v92, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v93, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v95, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v105, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v107, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v108, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v109, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v111, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v120, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v121, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v122, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v124, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:704 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:708 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0
+; ALIGNED-NEXT:    s_mov_b32 s6, exec_lo
+; ALIGNED-NEXT:    v_cndmask_b32_e32 v0, -1, v0, vcc_lo
+; ALIGNED-NEXT:    v_cmpx_ge_u32_e64 v2, v0
+; ALIGNED-NEXT:    s_xor_b32 s6, exec_lo, s6
+; ALIGNED-NEXT:    s_cbranch_execz .LBB9_2
+; ALIGNED-NEXT:  .LBB9_1: ; %memmove_fwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    s_clause 0x39
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v2, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_load_ubyte v127, v2, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    s_waitcnt vmcnt(57)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(56)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(55)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(54)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(53)
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(52)
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(51)
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(50)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(49)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(48)
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(45)
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(44)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(43)
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v9, 8, v5
+; ALIGNED-NEXT:    s_waitcnt vmcnt(41)
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v8, 8, v6
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v10, 8, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v11, 8, v12
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v15, 8, v14
+; ALIGNED-NEXT:    v_lshl_or_b32 v8, v19, 8, v17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(40)
+; ALIGNED-NEXT:    v_lshl_or_b32 v9, v16, 8, v13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(38)
+; ALIGNED-NEXT:    v_lshl_or_b32 v10, v20, 8, v18
+; ALIGNED-NEXT:    s_waitcnt vmcnt(36)
+; ALIGNED-NEXT:    v_lshl_or_b32 v11, v23, 8, v22
+; ALIGNED-NEXT:    s_waitcnt vmcnt(34)
+; ALIGNED-NEXT:    v_lshl_or_b32 v12, v27, 8, v25
+; ALIGNED-NEXT:    s_waitcnt vmcnt(32)
+; ALIGNED-NEXT:    v_lshl_or_b32 v13, v24, 8, v21
+; ALIGNED-NEXT:    s_waitcnt vmcnt(30)
+; ALIGNED-NEXT:    v_lshl_or_b32 v14, v28, 8, v26
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v6, 16, v5
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v8, 16, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v10, 16, v9
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v12, 16, v11
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v14, 16, v13
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(28)
+; ALIGNED-NEXT:    v_lshl_or_b32 v15, v30, 8, v29
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(26)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v32, 8, v34
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(24)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v36, 8, v31
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(22)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v35, 8, v33
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(12)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v48, 8, v37
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v39, 8, v38
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v50, 8, v49
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v51, 8, v52
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v0, 16, v15
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v5, 16, v4
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v7, 16, v6
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v54, 8, v53
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(11)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v55, 8, v65
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v66, 8, v64
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v68, 8, v67
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:784 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v70, 8, v69
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v80, 8, v71
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:788 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:792 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v65, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v64, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v70, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v80, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:1404 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v81, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1280 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1288 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1240 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1244 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1276 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1236 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1272 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1264 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1268 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1256 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v3
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_load_ubyte v4, v2, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1336 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1340 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1332 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1324 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:1364 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1352 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1376 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1356 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1344 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v4, 8, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x3
+; ALIGNED-NEXT:    buffer_load_ubyte v124, v2, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_load_ubyte v111, v2, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    buffer_load_ubyte v120, v2, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v124, 8, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v120, 8, v111
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1400 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v121, v2, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    buffer_load_ubyte v122, v2, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_load_ubyte v109, v2, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v121, 8, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v109, 8, v122
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v108, v2, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_load_ubyte v105, v2, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_load_ubyte v107, v2, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_load_ubyte v104, v2, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v2, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v105, 8, v108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v104, 8, v107
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1416 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v93, v2, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v2, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v2, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v92, 8, v93
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v95, 8, v90
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v89, v2, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_load_ubyte v79, v2, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_load_ubyte v73, v2, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_load_ubyte v74, v2, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v2, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_load_ubyte v75, v2, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_load_ubyte v76, v2, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_load_ubyte v72, v2, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v79, 8, v89
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v74, 8, v73
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v72, 8, v76
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v75, 8, v88
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v63, v2, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_load_ubyte v61, v2, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_load_ubyte v62, v2, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_load_ubyte v60, v2, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v2, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v61, 8, v63
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v60, 8, v62
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v59, v2, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v2, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_load_ubyte v56, v2, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v57, 8, v59
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v58, 8, v56
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v47, v2, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_load_ubyte v45, v2, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_load_ubyte v41, v2, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_load_ubyte v42, v2, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_load_ubyte v46, v2, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v2, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v2, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_load_ubyte v40, v2, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v45, 8, v47
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v42, 8, v41
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v40, 8, v44
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v43, 8, v46
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v119, v2, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_load_ubyte v117, v2, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_load_ubyte v118, v2, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_load_ubyte v116, v2, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_load_ubyte v114, v2, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v117, 8, v119
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v116, 8, v118
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v115, v2, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_load_ubyte v113, v2, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_load_ubyte v112, v2, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v113, 8, v115
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v114, 8, v112
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v102, v2, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_load_ubyte v100, v2, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_load_ubyte v97, v2, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_load_ubyte v96, v2, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_load_ubyte v101, v2, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_load_ubyte v99, v2, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_load_ubyte v98, v2, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_load_ubyte v87, v2, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v100, 8, v102
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v96, 8, v97
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v87, 8, v98
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v99, 8, v101
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v85, v2, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_ubyte v83, v2, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_load_ubyte v84, v2, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_load_ubyte v82, v2, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v2, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v83, 8, v85
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v82, 8, v84
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v2, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v2, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v2, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v70, 8, v80
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v81, 8, v69
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v2, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v2, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v2, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v2, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v2, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v54, 8, v68
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v53, 8, v66
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v2, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v2, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v2, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v2, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v2, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v2, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v2, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v2, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v64, 8, v67
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v50, 8, v49
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v51, 8, v65
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v48, 8, v52
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v2, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v2, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v2, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v37, 8, v39
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v55, 8, v38
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v2, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v2, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v2, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v2, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v2, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v2, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v2, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v2, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v34, 8, v36
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v30, 8, v31
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v4, 16, v3
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v33, 8, v35
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v29, 8, v32
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1488 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x17
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v2, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v2, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v2, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v2, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v2, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v2, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v2, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v2, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v2, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v2, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v2, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v2, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v2, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v2, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v2, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v2, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v2, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v2, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v2, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v2, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v2, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v2, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v2, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v2, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    v_lshl_or_b32 v110, v4, 16, v3
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v2, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    buffer_load_ubyte v123, v2, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v2, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(27)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v27, 8, v28
+; ALIGNED-NEXT:    s_waitcnt vmcnt(25)
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v25, 8, v26
+; ALIGNED-NEXT:    s_waitcnt vmcnt(13)
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v13, 8, v16
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshl_or_b32 v91, v9, 8, v10
+; ALIGNED-NEXT:    v_lshl_or_b32 v94, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v22, 8, v24
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v23, 8, v21
+; ALIGNED-NEXT:    v_lshl_or_b32 v78, v4, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v18, 8, v20
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v14, 8, v15
+; ALIGNED-NEXT:    v_lshl_or_b32 v103, v4, 16, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v17, 8, v19
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v106, off, s[0:3], s32 offset:1296 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:1304 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v125, off, s[0:3], s32 offset:1308 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v86, v77, 16, v4
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v11, 8, v12
+; ALIGNED-NEXT:    v_lshl_or_b32 v71, v91, 16, v77
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v6, 8, v8
+; ALIGNED-NEXT:    v_lshl_or_b32 v91, v7, 8, v5
+; ALIGNED-NEXT:    v_lshl_or_b32 v4, v91, 16, v77
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v77, v2, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1260 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1320 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v77, off, s[0:3], s32 offset:1284 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:1300 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v77, 8, v1
+; ALIGNED-NEXT:    v_lshl_or_b32 v91, v0, 8, v91
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v91, 16, v77
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v123, 8, v106
+; ALIGNED-NEXT:    v_lshl_or_b32 v91, v3, 8, v125
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v2, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1316 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v91, 16, v77
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1348 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v2, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    buffer_load_ubyte v126, v2, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1360 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v3, 8, v1
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1380 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:1372 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v91, v91, 8, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1368 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v91, 16, v77
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v2, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v2, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    buffer_load_ubyte v123, v2, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v77, v125, 8, v1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v91, v126, 8, v123
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v91, 16, v77
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v2, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    buffer_load_ubyte v77, v2, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v2, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:232
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:236
+; ALIGNED-NEXT:    buffer_store_dword v86, off, s[0:3], s32 offset:228
+; ALIGNED-NEXT:    buffer_store_dword v103, off, s[0:3], s32 offset:224
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:704
+; ALIGNED-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:708
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v2, 0x100, v2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v127, 8, v77
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v127, v91, 8, v106
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_add_co_u32 v3, vcc_lo, v3, s4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, s5, v4, vcc_lo
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v5 offset:250
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v7 offset:251
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v6 offset:249
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v9 offset:255
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v11 offset:253
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v10 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v12 offset:252
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v8 offset:248
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v15 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v14 offset:243
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v18 offset:241
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v13 offset:247
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v17 offset:245
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v16 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v19 offset:244
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v20 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:248
+; ALIGNED-NEXT:    buffer_store_dword v94, off, s[0:3], s32 offset:252
+; ALIGNED-NEXT:    buffer_store_dword v110, off, s[0:3], s32 offset:244
+; ALIGNED-NEXT:    v_lshl_or_b32 v127, v0, 16, v127
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1488 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0x100
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, 0
+; ALIGNED-NEXT:    s_cmp_lg_u64 s[4:5], 0x800
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:240
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v21 offset:234
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v23 offset:235
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v22 offset:233
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v25 offset:239
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v27 offset:237
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v26 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v28 offset:236
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v24 offset:232
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v31 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v30 offset:227
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v34 offset:225
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v29 offset:231
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v33 offset:229
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v32 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v35 offset:228
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v36 offset:224
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:192
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:204
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:200
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:196
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v54 offset:213
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v53 offset:215
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v37 offset:209
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v55 offset:211
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v38 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v66 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v68 offset:212
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v49 offset:218
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v50 offset:219
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v64 offset:217
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v48 offset:223
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v51 offset:221
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v52 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v65 offset:220
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v67 offset:216
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v39 offset:208
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:216
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:220
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:212
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:208
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v69 offset:202
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v81 offset:203
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v70 offset:201
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v82 offset:207
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v83 offset:205
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v84 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v85 offset:204
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v80 offset:200
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v97 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v96 offset:195
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v100 offset:193
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v87 offset:199
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v99 offset:197
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v98 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v101 offset:196
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v102 offset:192
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:296
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:300
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:292
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:288
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v112 offset:186
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v114 offset:187
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v113 offset:185
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v116 offset:191
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v117 offset:189
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v118 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v119 offset:188
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v115 offset:184
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v41 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v42 offset:179
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v45 offset:177
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v40 offset:183
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v43 offset:181
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v44 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v46 offset:180
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v47 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:312
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:316
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:308
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:304
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v56 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v58 offset:171
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v57 offset:169
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v60 offset:175
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v61 offset:173
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v62 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v63 offset:172
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v59 offset:168
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v73 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v74 offset:163
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v79 offset:161
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v72 offset:167
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v75 offset:165
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v76 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v88 offset:164
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v89 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:264
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1416 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:268
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:260
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:256
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v90 offset:154
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v95 offset:155
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v92 offset:153
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v104 offset:159
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v105 offset:157
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v107 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v108 offset:156
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v93 offset:152
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v111 offset:146
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v120 offset:147
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v124 offset:145
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v109 offset:151
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v121 offset:149
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v122 offset:150
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1400 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:280
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1352 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:284
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:276
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:272
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1376 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:138
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1364 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:139
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1356 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:137
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1340 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:143
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1332 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:141
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1336 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:142
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1324 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1344 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1272 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:130
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1264 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:131
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1256 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:129
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1288 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:135
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1276 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:133
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1280 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:134
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1268 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:360
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:364
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:356
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:352
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1244 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:122
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1240 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:123
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1236 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:121
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:127
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:125
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:126
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:114
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:115
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:113
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:119
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:117
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:118
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:376
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:380
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:372
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:368
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:106
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:107
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:105
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:111
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:109
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:110
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:98
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:99
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:97
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:103
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:101
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:102
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:328
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:332
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:324
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:320
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:90
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:91
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:89
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:95
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:93
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:94
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:82
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:83
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:81
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:87
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:85
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:86
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:344
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:348
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:340
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:336
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:74
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:75
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:73
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:79
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:77
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:78
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:66
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:67
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:65
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:71
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:69
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:70
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:424
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:428
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:420
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:416
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:61
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:58
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:59
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:57
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:63
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:62
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:53
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:50
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:51
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:49
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:55
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:54
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:444
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:440
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:436
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:432
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:43
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:42
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:41
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:47
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:46
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:45
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:792 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:35
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:34
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:33
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:39
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:38
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:37
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:392
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:396
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:388
+; ALIGNED-NEXT:    buffer_store_dword v127, off, s[0:3], s32 offset:384
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:26
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:27
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:25
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:31
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:29
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:30
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:24
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v77 offset:18
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1404 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:19
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v91 offset:17
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:23
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:21
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:22
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:20
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v106 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:408
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:412
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1348 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:404
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1316 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:400
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v123 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v126 offset:11
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1380 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v125 offset:9
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1372 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:15
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1368 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:14
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1360 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:12
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v1 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1300 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:2
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:3
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1284 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:1
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1320 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:7
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1304 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:5
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1308 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:6
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1296 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1260 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[3:4], v0
+; ALIGNED-NEXT:    s_cbranch_scc1 .LBB9_1
+; ALIGNED-NEXT:  .LBB9_2: ; %Flow10
+; ALIGNED-NEXT:    s_andn2_saveexec_b32 s8, s6
+; ALIGNED-NEXT:    s_cbranch_execz .LBB9_5
+; ALIGNED-NEXT:  ; %bb.3: ; %memmove_bwd_loop.preheader
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v4, 0x700, v2
+; ALIGNED-NEXT:    s_movk_i32 s6, 0xff00
+; ALIGNED-NEXT:    s_mov_b64 s[4:5], 0x700
+; ALIGNED-NEXT:    s_mov_b32 s7, -1
+; ALIGNED-NEXT:  .LBB9_4: ; %memmove_bwd_loop
+; ALIGNED-NEXT:    ; =>This Inner Loop Header: Depth=1
+; ALIGNED-NEXT:    s_clause 0x3a
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:20
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:21
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:22
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:23
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:24
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v4, s[0:3], 0 offen offset:25
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v4, s[0:3], 0 offen offset:26
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:30
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:31
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v4, s[0:3], 0 offen offset:32
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v4, s[0:3], 0 offen offset:33
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v4, s[0:3], 0 offen offset:34
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v4, s[0:3], 0 offen offset:29
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:28
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v4, s[0:3], 0 offen offset:27
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v4, s[0:3], 0 offen offset:35
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v4, s[0:3], 0 offen offset:36
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v4, s[0:3], 0 offen offset:37
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v4, s[0:3], 0 offen offset:38
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v4, s[0:3], 0 offen offset:39
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v4, s[0:3], 0 offen offset:40
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v4, s[0:3], 0 offen offset:41
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v4, s[0:3], 0 offen offset:42
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v4, s[0:3], 0 offen offset:43
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v4, s[0:3], 0 offen offset:44
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v4, s[0:3], 0 offen offset:45
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v4, s[0:3], 0 offen offset:46
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v4, s[0:3], 0 offen offset:47
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v4, s[0:3], 0 offen offset:48
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v4, s[0:3], 0 offen offset:49
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v4, s[0:3], 0 offen offset:50
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v4, s[0:3], 0 offen offset:51
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v4, s[0:3], 0 offen offset:52
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v4, s[0:3], 0 offen offset:53
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v4, s[0:3], 0 offen offset:54
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v4, s[0:3], 0 offen offset:55
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v4, s[0:3], 0 offen offset:56
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v4, s[0:3], 0 offen offset:57
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v4, s[0:3], 0 offen offset:58
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v4, s[0:3], 0 offen offset:60
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v4, s[0:3], 0 offen offset:61
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v4, s[0:3], 0 offen offset:62
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v4, s[0:3], 0 offen offset:63
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v4, s[0:3], 0 offen offset:64
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v4, s[0:3], 0 offen offset:65
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v4, s[0:3], 0 offen offset:66
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v4, s[0:3], 0 offen offset:59
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v4, s[0:3], 0 offen offset:67
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v4, s[0:3], 0 offen offset:68
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v4, s[0:3], 0 offen offset:69
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v4, s[0:3], 0 offen offset:70
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v4, s[0:3], 0 offen offset:71
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v4, s[0:3], 0 offen offset:76
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v4, s[0:3], 0 offen offset:77
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v4, s[0:3], 0 offen offset:78
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v4, s[0:3], 0 offen offset:79
+; ALIGNED-NEXT:    buffer_load_ubyte v126, v4, s[0:3], 0 offen offset:19
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v4, s[0:3], 0 offen offset:75
+; ALIGNED-NEXT:    buffer_load_ubyte v125, v4, s[0:3], 0 offen offset:151
+; ALIGNED-NEXT:    s_waitcnt vmcnt(58)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(57)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:716 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(56)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:720 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(55)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:724 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(54)
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:732 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(53)
+; ALIGNED-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:748 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(52)
+; ALIGNED-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:756 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(51)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:740 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(50)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:744 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(49)
+; ALIGNED-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:768 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(46)
+; ALIGNED-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:736 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(45)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:728 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(44)
+; ALIGNED-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:752 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v9, 8, v5
+; ALIGNED-NEXT:    s_waitcnt vmcnt(42)
+; ALIGNED-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:760 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v8, 8, v6
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v10, 8, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v11, 8, v12
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v15, 8, v14
+; ALIGNED-NEXT:    v_lshl_or_b32 v8, v19, 8, v17
+; ALIGNED-NEXT:    s_waitcnt vmcnt(41)
+; ALIGNED-NEXT:    v_lshl_or_b32 v9, v16, 8, v13
+; ALIGNED-NEXT:    s_waitcnt vmcnt(39)
+; ALIGNED-NEXT:    v_lshl_or_b32 v10, v20, 8, v18
+; ALIGNED-NEXT:    s_waitcnt vmcnt(37)
+; ALIGNED-NEXT:    v_lshl_or_b32 v11, v23, 8, v22
+; ALIGNED-NEXT:    s_waitcnt vmcnt(35)
+; ALIGNED-NEXT:    v_lshl_or_b32 v12, v28, 8, v25
+; ALIGNED-NEXT:    s_waitcnt vmcnt(33)
+; ALIGNED-NEXT:    v_lshl_or_b32 v13, v24, 8, v21
+; ALIGNED-NEXT:    s_waitcnt vmcnt(31)
+; ALIGNED-NEXT:    v_lshl_or_b32 v14, v27, 8, v26
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v8, 16, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v10, 16, v9
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v12, 16, v11
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v14, 16, v13
+; ALIGNED-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:772 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(29)
+; ALIGNED-NEXT:    v_lshl_or_b32 v15, v31, 8, v30
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(27)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v34, 8, v33
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:796 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(25)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v37, 8, v32
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:800 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(23)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v36, 8, v35
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:840 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(18)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v50, 8, v38
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:852 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(16)
+; ALIGNED-NEXT:    v_lshl_or_b32 v5, v49, 8, v39
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:868 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v6, v51, 8, v48
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:880 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(12)
+; ALIGNED-NEXT:    v_lshl_or_b32 v7, v53, 8, v52
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v0, 16, v15
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v5, 16, v3
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:85
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v7, 16, v6
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v55, 8, v29
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:920 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(12)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v67, 8, v66
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:928 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(10)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v64, 8, v54
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:932 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v68, 8, v65
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:86
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:82
+; ALIGNED-NEXT:    buffer_store_dword v17, off, s[0:3], s32 offset:788 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v70, 8, v69
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:83
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:74
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:988 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v80, 8, v71
+; ALIGNED-NEXT:    buffer_store_dword v19, off, s[0:3], s32 offset:792 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v16, off, s[0:3], s32 offset:764 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v18, off, s[0:3], s32 offset:776 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v20, off, s[0:3], s32 offset:784 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:73
+; ALIGNED-NEXT:    buffer_store_dword v22, off, s[0:3], s32 offset:812 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v23, off, s[0:3], s32 offset:816 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v25, off, s[0:3], s32 offset:828 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:72
+; ALIGNED-NEXT:    buffer_store_dword v28, off, s[0:3], s32 offset:832 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v21, off, s[0:3], s32 offset:804 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v24, off, s[0:3], s32 offset:808 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v26, off, s[0:3], s32 offset:820 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v27, off, s[0:3], s32 offset:824 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v30, off, s[0:3], s32 offset:836 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:844 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:860 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v34, off, s[0:3], s32 offset:856 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:848 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v37, off, s[0:3], s32 offset:876 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v35, off, s[0:3], s32 offset:864 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v36, off, s[0:3], s32 offset:872 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v38, off, s[0:3], s32 offset:884 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v50, off, s[0:3], s32 offset:900 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v39, off, s[0:3], s32 offset:888 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v49, off, s[0:3], s32 offset:896 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v48, off, s[0:3], s32 offset:892 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v51, off, s[0:3], s32 offset:904 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v52, off, s[0:3], s32 offset:912 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v53, off, s[0:3], s32 offset:908 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v29, off, s[0:3], s32 offset:924 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v55, off, s[0:3], s32 offset:936 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v66, off, s[0:3], s32 offset:956 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v67, off, s[0:3], s32 offset:948 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v54, off, s[0:3], s32 offset:940 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v64, off, s[0:3], s32 offset:944 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v65, off, s[0:3], s32 offset:952 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v68, off, s[0:3], s32 offset:960 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v69, off, s[0:3], s32 offset:964 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v70, off, s[0:3], s32 offset:968 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v71, off, s[0:3], s32 offset:972 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_store_dword v80, off, s[0:3], s32 offset:980 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:1416 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(8)
+; ALIGNED-NEXT:    buffer_store_dword v81, off, s[0:3], s32 offset:1000 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:87
+; ALIGNED-NEXT:    s_waitcnt vmcnt(7)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1032 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1036 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1020 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1004 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:996 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v81, 8, v2
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:84
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:81
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:80
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1040 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1024 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1016 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:98
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:102
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:103
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:94
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:95
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:93
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:91
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:92
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1116 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1120 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1060 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1064 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1056 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v2
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:90
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1080 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:101
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:89
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:88
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1084 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1112 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1076 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:99
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:100
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1108 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:97
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:96
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1100 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1104 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1096 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:114
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:118
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:119
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:110
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:111
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:109
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:107
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:108
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1196 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1200 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1140 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1144 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1136 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v2
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:106
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1160 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:117
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:105
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:104
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1164 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1192 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1156 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:115
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:116
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1188 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:113
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:112
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1180 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1184 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1176 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:130
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:134
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:135
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:126
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:127
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:125
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:123
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:124
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1276 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:1280 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1220 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1224 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1216 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v2
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:122
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1240 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:133
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:121
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:120
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1244 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1272 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1236 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:131
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:132
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1268 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:129
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:128
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1260 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1264 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1256 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v7
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:146
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v8, 8, v6
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:150
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1284 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:142
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:143
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:141
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:139
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:140
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:1372 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1300 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1304 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1296 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1288 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v5, 8, v2
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:138
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1320 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:149
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:137
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:136
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1324 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:1368 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1316 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1308 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v2
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_ubyte v3, v4, s[0:3], 0 offen offset:147
+; ALIGNED-NEXT:    buffer_load_ubyte v2, v4, s[0:3], 0 offen offset:148
+; ALIGNED-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:1356 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:145
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen offset:144
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:1348 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:1352 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:1340 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1332 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 8, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v3, 8, v7
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v125, 8, v6
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v5, 8, v2
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v122, v4, s[0:3], 0 offen offset:156
+; ALIGNED-NEXT:    buffer_load_ubyte v111, v4, s[0:3], 0 offen offset:157
+; ALIGNED-NEXT:    buffer_load_ubyte v120, v4, s[0:3], 0 offen offset:158
+; ALIGNED-NEXT:    buffer_load_ubyte v109, v4, s[0:3], 0 offen offset:159
+; ALIGNED-NEXT:    buffer_load_ubyte v106, v4, s[0:3], 0 offen offset:155
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v111, 8, v122
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v109, 8, v120
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1400 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v105, v4, s[0:3], 0 offen offset:152
+; ALIGNED-NEXT:    buffer_load_ubyte v94, v4, s[0:3], 0 offen offset:153
+; ALIGNED-NEXT:    buffer_load_ubyte v92, v4, s[0:3], 0 offen offset:154
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v94, 8, v105
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v106, 8, v92
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v89, v4, s[0:3], 0 offen offset:160
+; ALIGNED-NEXT:    buffer_load_ubyte v79, v4, s[0:3], 0 offen offset:161
+; ALIGNED-NEXT:    buffer_load_ubyte v73, v4, s[0:3], 0 offen offset:162
+; ALIGNED-NEXT:    buffer_load_ubyte v74, v4, s[0:3], 0 offen offset:163
+; ALIGNED-NEXT:    buffer_load_ubyte v88, v4, s[0:3], 0 offen offset:164
+; ALIGNED-NEXT:    buffer_load_ubyte v75, v4, s[0:3], 0 offen offset:165
+; ALIGNED-NEXT:    buffer_load_ubyte v77, v4, s[0:3], 0 offen offset:166
+; ALIGNED-NEXT:    buffer_load_ubyte v72, v4, s[0:3], 0 offen offset:167
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v79, 8, v89
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v74, 8, v73
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v72, 8, v77
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v75, 8, v88
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v63, v4, s[0:3], 0 offen offset:172
+; ALIGNED-NEXT:    buffer_load_ubyte v61, v4, s[0:3], 0 offen offset:173
+; ALIGNED-NEXT:    buffer_load_ubyte v62, v4, s[0:3], 0 offen offset:174
+; ALIGNED-NEXT:    buffer_load_ubyte v60, v4, s[0:3], 0 offen offset:175
+; ALIGNED-NEXT:    buffer_load_ubyte v58, v4, s[0:3], 0 offen offset:171
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v61, 8, v63
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v60, 8, v62
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v59, v4, s[0:3], 0 offen offset:168
+; ALIGNED-NEXT:    buffer_load_ubyte v56, v4, s[0:3], 0 offen offset:169
+; ALIGNED-NEXT:    buffer_load_ubyte v47, v4, s[0:3], 0 offen offset:170
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v56, 8, v59
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v58, 8, v47
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v46, v4, s[0:3], 0 offen offset:176
+; ALIGNED-NEXT:    buffer_load_ubyte v44, v4, s[0:3], 0 offen offset:177
+; ALIGNED-NEXT:    buffer_load_ubyte v119, v4, s[0:3], 0 offen offset:178
+; ALIGNED-NEXT:    buffer_load_ubyte v40, v4, s[0:3], 0 offen offset:179
+; ALIGNED-NEXT:    buffer_load_ubyte v45, v4, s[0:3], 0 offen offset:180
+; ALIGNED-NEXT:    buffer_load_ubyte v41, v4, s[0:3], 0 offen offset:181
+; ALIGNED-NEXT:    buffer_load_ubyte v42, v4, s[0:3], 0 offen offset:182
+; ALIGNED-NEXT:    buffer_load_ubyte v118, v4, s[0:3], 0 offen offset:183
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v44, 8, v46
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v40, 8, v119
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v118, 8, v42
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v41, 8, v45
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v117, v4, s[0:3], 0 offen offset:188
+; ALIGNED-NEXT:    buffer_load_ubyte v115, v4, s[0:3], 0 offen offset:189
+; ALIGNED-NEXT:    buffer_load_ubyte v116, v4, s[0:3], 0 offen offset:190
+; ALIGNED-NEXT:    buffer_load_ubyte v114, v4, s[0:3], 0 offen offset:191
+; ALIGNED-NEXT:    buffer_load_ubyte v112, v4, s[0:3], 0 offen offset:187
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v115, 8, v117
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v114, 8, v116
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v113, v4, s[0:3], 0 offen offset:184
+; ALIGNED-NEXT:    buffer_load_ubyte v103, v4, s[0:3], 0 offen offset:185
+; ALIGNED-NEXT:    buffer_load_ubyte v102, v4, s[0:3], 0 offen offset:186
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v103, 8, v113
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v1, v112, 8, v102
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v100, v4, s[0:3], 0 offen offset:192
+; ALIGNED-NEXT:    buffer_load_ubyte v98, v4, s[0:3], 0 offen offset:193
+; ALIGNED-NEXT:    buffer_load_ubyte v87, v4, s[0:3], 0 offen offset:194
+; ALIGNED-NEXT:    buffer_load_ubyte v86, v4, s[0:3], 0 offen offset:195
+; ALIGNED-NEXT:    buffer_load_ubyte v99, v4, s[0:3], 0 offen offset:196
+; ALIGNED-NEXT:    buffer_load_ubyte v97, v4, s[0:3], 0 offen offset:197
+; ALIGNED-NEXT:    buffer_load_ubyte v96, v4, s[0:3], 0 offen offset:198
+; ALIGNED-NEXT:    buffer_load_ubyte v85, v4, s[0:3], 0 offen offset:199
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v98, 8, v100
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v86, 8, v87
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v97, 8, v99
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v85, 8, v96
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v83, v4, s[0:3], 0 offen offset:204
+; ALIGNED-NEXT:    buffer_load_ubyte v81, v4, s[0:3], 0 offen offset:205
+; ALIGNED-NEXT:    buffer_load_ubyte v82, v4, s[0:3], 0 offen offset:206
+; ALIGNED-NEXT:    buffer_load_ubyte v80, v4, s[0:3], 0 offen offset:207
+; ALIGNED-NEXT:    buffer_load_ubyte v71, v4, s[0:3], 0 offen offset:203
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v81, 8, v83
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v80, 8, v82
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v70, v4, s[0:3], 0 offen offset:200
+; ALIGNED-NEXT:    buffer_load_ubyte v69, v4, s[0:3], 0 offen offset:201
+; ALIGNED-NEXT:    buffer_load_ubyte v68, v4, s[0:3], 0 offen offset:202
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v69, 8, v70
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v71, 8, v68
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v67, v4, s[0:3], 0 offen offset:212
+; ALIGNED-NEXT:    buffer_load_ubyte v54, v4, s[0:3], 0 offen offset:213
+; ALIGNED-NEXT:    buffer_load_ubyte v65, v4, s[0:3], 0 offen offset:214
+; ALIGNED-NEXT:    buffer_load_ubyte v52, v4, s[0:3], 0 offen offset:215
+; ALIGNED-NEXT:    buffer_load_ubyte v55, v4, s[0:3], 0 offen offset:211
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v54, 8, v67
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v52, 8, v65
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v66, v4, s[0:3], 0 offen offset:216
+; ALIGNED-NEXT:    buffer_load_ubyte v53, v4, s[0:3], 0 offen offset:217
+; ALIGNED-NEXT:    buffer_load_ubyte v49, v4, s[0:3], 0 offen offset:218
+; ALIGNED-NEXT:    buffer_load_ubyte v48, v4, s[0:3], 0 offen offset:219
+; ALIGNED-NEXT:    buffer_load_ubyte v64, v4, s[0:3], 0 offen offset:220
+; ALIGNED-NEXT:    buffer_load_ubyte v51, v4, s[0:3], 0 offen offset:221
+; ALIGNED-NEXT:    buffer_load_ubyte v50, v4, s[0:3], 0 offen offset:222
+; ALIGNED-NEXT:    buffer_load_ubyte v39, v4, s[0:3], 0 offen offset:223
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v53, 8, v66
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v48, 8, v49
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v51, 8, v64
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v39, 8, v50
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v38, v4, s[0:3], 0 offen offset:208
+; ALIGNED-NEXT:    buffer_load_ubyte v36, v4, s[0:3], 0 offen offset:209
+; ALIGNED-NEXT:    buffer_load_ubyte v37, v4, s[0:3], 0 offen offset:210
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v36, 8, v38
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v55, 8, v37
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x7
+; ALIGNED-NEXT:    buffer_load_ubyte v35, v4, s[0:3], 0 offen offset:224
+; ALIGNED-NEXT:    buffer_load_ubyte v33, v4, s[0:3], 0 offen offset:225
+; ALIGNED-NEXT:    buffer_load_ubyte v29, v4, s[0:3], 0 offen offset:226
+; ALIGNED-NEXT:    buffer_load_ubyte v30, v4, s[0:3], 0 offen offset:227
+; ALIGNED-NEXT:    buffer_load_ubyte v34, v4, s[0:3], 0 offen offset:228
+; ALIGNED-NEXT:    buffer_load_ubyte v31, v4, s[0:3], 0 offen offset:229
+; ALIGNED-NEXT:    buffer_load_ubyte v32, v4, s[0:3], 0 offen offset:230
+; ALIGNED-NEXT:    buffer_load_ubyte v28, v4, s[0:3], 0 offen offset:231
+; ALIGNED-NEXT:    s_waitcnt vmcnt(6)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v33, 8, v35
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v30, 8, v29
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v3, 16, v2
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v31, 8, v34
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v28, 8, v32
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x17
+; ALIGNED-NEXT:    buffer_load_ubyte v27, v4, s[0:3], 0 offen offset:236
+; ALIGNED-NEXT:    buffer_load_ubyte v25, v4, s[0:3], 0 offen offset:237
+; ALIGNED-NEXT:    buffer_load_ubyte v26, v4, s[0:3], 0 offen offset:238
+; ALIGNED-NEXT:    buffer_load_ubyte v24, v4, s[0:3], 0 offen offset:239
+; ALIGNED-NEXT:    buffer_load_ubyte v23, v4, s[0:3], 0 offen offset:235
+; ALIGNED-NEXT:    buffer_load_ubyte v22, v4, s[0:3], 0 offen offset:232
+; ALIGNED-NEXT:    buffer_load_ubyte v21, v4, s[0:3], 0 offen offset:233
+; ALIGNED-NEXT:    buffer_load_ubyte v20, v4, s[0:3], 0 offen offset:234
+; ALIGNED-NEXT:    buffer_load_ubyte v19, v4, s[0:3], 0 offen offset:240
+; ALIGNED-NEXT:    buffer_load_ubyte v17, v4, s[0:3], 0 offen offset:241
+; ALIGNED-NEXT:    buffer_load_ubyte v13, v4, s[0:3], 0 offen offset:242
+; ALIGNED-NEXT:    buffer_load_ubyte v14, v4, s[0:3], 0 offen offset:243
+; ALIGNED-NEXT:    buffer_load_ubyte v18, v4, s[0:3], 0 offen offset:244
+; ALIGNED-NEXT:    buffer_load_ubyte v15, v4, s[0:3], 0 offen offset:245
+; ALIGNED-NEXT:    buffer_load_ubyte v16, v4, s[0:3], 0 offen offset:246
+; ALIGNED-NEXT:    buffer_load_ubyte v12, v4, s[0:3], 0 offen offset:247
+; ALIGNED-NEXT:    buffer_load_ubyte v11, v4, s[0:3], 0 offen offset:252
+; ALIGNED-NEXT:    buffer_load_ubyte v9, v4, s[0:3], 0 offen offset:253
+; ALIGNED-NEXT:    buffer_load_ubyte v10, v4, s[0:3], 0 offen offset:254
+; ALIGNED-NEXT:    buffer_load_ubyte v8, v4, s[0:3], 0 offen offset:255
+; ALIGNED-NEXT:    buffer_load_ubyte v7, v4, s[0:3], 0 offen offset:251
+; ALIGNED-NEXT:    buffer_load_ubyte v6, v4, s[0:3], 0 offen offset:248
+; ALIGNED-NEXT:    buffer_load_ubyte v5, v4, s[0:3], 0 offen offset:249
+; ALIGNED-NEXT:    buffer_load_ubyte v1, v4, s[0:3], 0 offen offset:250
+; ALIGNED-NEXT:    v_lshl_or_b32 v123, v3, 16, v2
+; ALIGNED-NEXT:    buffer_load_ubyte v0, v4, s[0:3], 0 offen
+; ALIGNED-NEXT:    s_waitcnt vmcnt(23)
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v25, 8, v27
+; ALIGNED-NEXT:    s_waitcnt vmcnt(21)
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v24, 8, v26
+; ALIGNED-NEXT:    s_waitcnt vmcnt(9)
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v12, 8, v16
+; ALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; ALIGNED-NEXT:    v_lshl_or_b32 v57, v8, 8, v10
+; ALIGNED-NEXT:    v_lshl_or_b32 v104, v3, 16, v2
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v21, 8, v22
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v23, 8, v20
+; ALIGNED-NEXT:    v_lshl_or_b32 v76, v3, 16, v2
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v17, 8, v19
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v14, 8, v13
+; ALIGNED-NEXT:    v_lshl_or_b32 v101, v3, 16, v2
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v15, 8, v18
+; ALIGNED-NEXT:    v_lshl_or_b32 v84, v43, 16, v3
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v9, 8, v11
+; ALIGNED-NEXT:    v_lshl_or_b32 v3, v57, 16, v43
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v5, 8, v6
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v57, v7, 8, v1
+; ALIGNED-NEXT:    v_lshl_or_b32 v2, v57, 16, v43
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v4, s[0:3], 0 offen offset:1
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1336 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v43, off, s[0:3], s32 offset:1344 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x5
+; ALIGNED-NEXT:    buffer_load_ubyte v127, v4, s[0:3], 0 offen offset:2
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v4, s[0:3], 0 offen offset:3
+; ALIGNED-NEXT:    buffer_load_ubyte v78, v4, s[0:3], 0 offen offset:4
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v4, s[0:3], 0 offen offset:5
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v4, s[0:3], 0 offen offset:6
+; ALIGNED-NEXT:    buffer_load_ubyte v124, v4, s[0:3], 0 offen offset:7
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v43, 8, v0
+; ALIGNED-NEXT:    s_waitcnt vmcnt(4)
+; ALIGNED-NEXT:    buffer_store_dword v57, off, s[0:3], s32 offset:1360 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v57, v57, 8, v127
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    buffer_store_dword v78, off, s[0:3], s32 offset:1364 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    buffer_store_dword v90, off, s[0:3], s32 offset:1376 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    buffer_store_dword v91, off, s[0:3], s32 offset:1380 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v57, 16, v43
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v90, 8, v78
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v57, v124, 8, v91
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v57, 16, v43
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x4
+; ALIGNED-NEXT:    buffer_load_ubyte v121, v4, s[0:3], 0 offen offset:12
+; ALIGNED-NEXT:    buffer_load_ubyte v107, v4, s[0:3], 0 offen offset:13
+; ALIGNED-NEXT:    buffer_load_ubyte v110, v4, s[0:3], 0 offen offset:14
+; ALIGNED-NEXT:    buffer_load_ubyte v108, v4, s[0:3], 0 offen offset:15
+; ALIGNED-NEXT:    buffer_load_ubyte v93, v4, s[0:3], 0 offen offset:11
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v107, 8, v121
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v57, v108, 8, v110
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v57, 16, v43
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1404 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v95, v4, s[0:3], 0 offen offset:8
+; ALIGNED-NEXT:    buffer_load_ubyte v91, v4, s[0:3], 0 offen offset:9
+; ALIGNED-NEXT:    buffer_load_ubyte v90, v4, s[0:3], 0 offen offset:10
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_lshl_or_b32 v43, v91, 8, v95
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_lshl_or_b32 v57, v93, 8, v90
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v57, 16, v43
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Spill
+; ALIGNED-NEXT:    s_clause 0x2
+; ALIGNED-NEXT:    buffer_load_ubyte v78, v4, s[0:3], 0 offen offset:16
+; ALIGNED-NEXT:    buffer_load_ubyte v43, v4, s[0:3], 0 offen offset:18
+; ALIGNED-NEXT:    buffer_load_ubyte v57, v4, s[0:3], 0 offen offset:17
+; ALIGNED-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:488
+; ALIGNED-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:492
+; ALIGNED-NEXT:    buffer_store_dword v84, off, s[0:3], s32 offset:484
+; ALIGNED-NEXT:    buffer_store_dword v101, off, s[0:3], s32 offset:480
+; ALIGNED-NEXT:    s_clause 0x1
+; ALIGNED-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:704
+; ALIGNED-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:708
+; ALIGNED-NEXT:    v_add_nc_u32_e32 v4, 0xffffff00, v4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(3)
+; ALIGNED-NEXT:    v_lshl_or_b32 v0, v126, 8, v43
+; ALIGNED-NEXT:    s_waitcnt vmcnt(2)
+; ALIGNED-NEXT:    v_lshl_or_b32 v126, v57, 8, v78
+; ALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; ALIGNED-NEXT:    v_add_co_u32 v2, vcc_lo, v2, s4
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, s5, v3, vcc_lo
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v1 offset:250
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v7 offset:251
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v5 offset:249
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v8 offset:255
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v9 offset:253
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v10 offset:254
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v11 offset:252
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v6 offset:248
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v13 offset:242
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v14 offset:243
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v17 offset:241
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v12 offset:247
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v15 offset:245
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v16 offset:246
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v18 offset:244
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v19 offset:240
+; ALIGNED-NEXT:    buffer_store_dword v76, off, s[0:3], s32 offset:504
+; ALIGNED-NEXT:    buffer_store_dword v104, off, s[0:3], s32 offset:508
+; ALIGNED-NEXT:    buffer_store_dword v123, off, s[0:3], s32 offset:500
+; ALIGNED-NEXT:    v_lshl_or_b32 v126, v0, 16, v126
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1484 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_add_u32 s4, s4, 0xffffff00
+; ALIGNED-NEXT:    s_addc_u32 s5, s5, -1
+; ALIGNED-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:496
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v20 offset:234
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v23 offset:235
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v21 offset:233
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v24 offset:239
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v25 offset:237
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v26 offset:238
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v27 offset:236
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v22 offset:232
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v29 offset:226
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v30 offset:227
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v33 offset:225
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v28 offset:231
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v31 offset:229
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v32 offset:230
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v34 offset:228
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v35 offset:224
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1480 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:448
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1476 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:460
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1472 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:456
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1468 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:452
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v54 offset:213
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v52 offset:215
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v36 offset:209
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v55 offset:211
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v37 offset:210
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v65 offset:214
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v67 offset:212
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v49 offset:218
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v48 offset:219
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v53 offset:217
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v39 offset:223
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v51 offset:221
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v50 offset:222
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v64 offset:220
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v66 offset:216
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v38 offset:208
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1464 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:472
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1460 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:476
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1456 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:468
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1452 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:464
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v68 offset:202
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v71 offset:203
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v69 offset:201
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v80 offset:207
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v81 offset:205
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v82 offset:206
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v83 offset:204
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v70 offset:200
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v87 offset:194
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v86 offset:195
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v98 offset:193
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v85 offset:199
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v97 offset:197
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v96 offset:198
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v99 offset:196
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v100 offset:192
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1448 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:552
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1444 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:556
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1440 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:548
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1436 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:544
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v102 offset:186
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v112 offset:187
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v103 offset:185
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v114 offset:191
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v115 offset:189
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v116 offset:190
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v117 offset:188
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v113 offset:184
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v119 offset:178
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v40 offset:179
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v44 offset:177
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v118 offset:183
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v41 offset:181
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v42 offset:182
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v45 offset:180
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v46 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1432 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:568
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1428 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:572
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1424 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:564
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1420 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:560
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v47 offset:170
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v58 offset:171
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v56 offset:169
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v60 offset:175
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v61 offset:173
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v62 offset:174
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v63 offset:172
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v59 offset:168
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v73 offset:162
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v74 offset:163
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v79 offset:161
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v72 offset:167
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v75 offset:165
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v77 offset:166
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v88 offset:164
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v89 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1408 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:520
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1400 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:524
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1392 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:516
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1384 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:512
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v92 offset:154
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v106 offset:155
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v94 offset:153
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v109 offset:159
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v111 offset:157
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v120 offset:158
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v122 offset:156
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v105 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1356 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:146
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1348 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:147
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1340 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:145
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v125 offset:151
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1368 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:149
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1372 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:150
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1352 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1332 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1328 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:536
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1312 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:540
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1292 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:532
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1284 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:528
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1324 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:138
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1320 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:139
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1316 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:137
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1304 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:143
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1296 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:141
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1300 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:142
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1288 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1308 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1268 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:130
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1260 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:131
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1256 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:129
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1280 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:135
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1272 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:133
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1276 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:134
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1264 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1252 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1248 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:616
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1232 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:620
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1212 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:612
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1204 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:608
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1244 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:122
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1240 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:123
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1236 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:121
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1224 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:127
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1216 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:125
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1220 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:126
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1208 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1228 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1188 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:114
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1180 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:115
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1176 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:113
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1200 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:119
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1192 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:117
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1196 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:118
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1184 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1172 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1168 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:632
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1152 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:636
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1132 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:628
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1124 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:624
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1164 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:106
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1160 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:107
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1156 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:105
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1144 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:111
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1136 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:109
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1140 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:110
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1128 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1148 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1108 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:98
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1100 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:99
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1096 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:97
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1120 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:103
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1112 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:101
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1116 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:102
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1104 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1092 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1088 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:584
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1072 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:588
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1052 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:580
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1044 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:576
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1084 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:90
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1080 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:91
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1076 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:89
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1064 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:95
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1056 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:93
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1060 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:94
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1048 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1068 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:82
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1020 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:83
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1016 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:81
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1040 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:87
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1032 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:85
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1036 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:86
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1024 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1012 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1008 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:600
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:992 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:604
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:988 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:596
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:976 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:592
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1004 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:74
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1000 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:75
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:996 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:73
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:980 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:79
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:968 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:77
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:972 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:78
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:964 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:984 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:956 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:66
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:948 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:67
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:936 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:65
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:960 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:71
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:944 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:69
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:952 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:70
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:940 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:924 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:932 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:680
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:928 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:684
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:920 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:676
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:916 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:672
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:900 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:61
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:912 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:58
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:908 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:59
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:904 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:57
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:896 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:63
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:888 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:62
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:884 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:892 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:876 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:53
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:860 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:50
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:856 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:51
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:844 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:49
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:872 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:55
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:864 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:54
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:848 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:836 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:880 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:700
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:868 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:696
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:852 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:692
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:840 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:688
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:832 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:43
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:828 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:42
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:816 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:41
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:812 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:824 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:47
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:820 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:46
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:808 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:45
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:804 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:792 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:35
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:788 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:34
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:772 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:33
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:768 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:784 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:39
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:776 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:38
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:764 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:37
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:760 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:800 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:648
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:796 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:652
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:780 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:644
+; ALIGNED-NEXT:    buffer_store_dword v126, off, s[0:3], s32 offset:640
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:756 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:26
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:752 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:27
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:748 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:25
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:744 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:31
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:736 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:29
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:740 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:30
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:728 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:732 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:24
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v43 offset:18
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1416 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:19
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v57 offset:17
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:724 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:23
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:716 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:21
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:720 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:22
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:712 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:20
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v78 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1412 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:664
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1404 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:668
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1396 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:660
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1388 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:656
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v90 offset:10
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v93 offset:11
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v107 offset:13
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v91 offset:9
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v108 offset:15
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v110 offset:14
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v121 offset:12
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v95 offset:8
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v127 offset:2
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1360 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:3
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1344 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:1
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v124 offset:7
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1376 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:5
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1380 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:6
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1364 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:1336 ; 4-byte Folded Reload
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0)
+; ALIGNED-NEXT:    flat_store_byte v[2:3], v0
+; ALIGNED-NEXT:    s_cbranch_scc0 .LBB9_4
+; ALIGNED-NEXT:  .LBB9_5: ; %Flow11
+; ALIGNED-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; ALIGNED-NEXT:    s_clause 0x2f
+; ALIGNED-NEXT:    buffer_load_dword v127, off, s[0:3], s32
+; ALIGNED-NEXT:    buffer_load_dword v126, off, s[0:3], s32 offset:4
+; ALIGNED-NEXT:    buffer_load_dword v125, off, s[0:3], s32 offset:8
+; ALIGNED-NEXT:    buffer_load_dword v124, off, s[0:3], s32 offset:12
+; ALIGNED-NEXT:    buffer_load_dword v123, off, s[0:3], s32 offset:16
+; ALIGNED-NEXT:    buffer_load_dword v122, off, s[0:3], s32 offset:20
+; ALIGNED-NEXT:    buffer_load_dword v121, off, s[0:3], s32 offset:24
+; ALIGNED-NEXT:    buffer_load_dword v120, off, s[0:3], s32 offset:28
+; ALIGNED-NEXT:    buffer_load_dword v111, off, s[0:3], s32 offset:32
+; ALIGNED-NEXT:    buffer_load_dword v110, off, s[0:3], s32 offset:36
+; ALIGNED-NEXT:    buffer_load_dword v109, off, s[0:3], s32 offset:40
+; ALIGNED-NEXT:    buffer_load_dword v108, off, s[0:3], s32 offset:44
+; ALIGNED-NEXT:    buffer_load_dword v107, off, s[0:3], s32 offset:48
+; ALIGNED-NEXT:    buffer_load_dword v106, off, s[0:3], s32 offset:52
+; ALIGNED-NEXT:    buffer_load_dword v105, off, s[0:3], s32 offset:56
+; ALIGNED-NEXT:    buffer_load_dword v104, off, s[0:3], s32 offset:60
+; ALIGNED-NEXT:    buffer_load_dword v95, off, s[0:3], s32 offset:64
+; ALIGNED-NEXT:    buffer_load_dword v94, off, s[0:3], s32 offset:68
+; ALIGNED-NEXT:    buffer_load_dword v93, off, s[0:3], s32 offset:72
+; ALIGNED-NEXT:    buffer_load_dword v92, off, s[0:3], s32 offset:76
+; ALIGNED-NEXT:    buffer_load_dword v91, off, s[0:3], s32 offset:80
+; ALIGNED-NEXT:    buffer_load_dword v90, off, s[0:3], s32 offset:84
+; ALIGNED-NEXT:    buffer_load_dword v89, off, s[0:3], s32 offset:88
+; ALIGNED-NEXT:    buffer_load_dword v88, off, s[0:3], s32 offset:92
+; ALIGNED-NEXT:    buffer_load_dword v79, off, s[0:3], s32 offset:96
+; ALIGNED-NEXT:    buffer_load_dword v78, off, s[0:3], s32 offset:100
+; ALIGNED-NEXT:    buffer_load_dword v77, off, s[0:3], s32 offset:104
+; ALIGNED-NEXT:    buffer_load_dword v76, off, s[0:3], s32 offset:108
+; ALIGNED-NEXT:    buffer_load_dword v75, off, s[0:3], s32 offset:112
+; ALIGNED-NEXT:    buffer_load_dword v74, off, s[0:3], s32 offset:116
+; ALIGNED-NEXT:    buffer_load_dword v73, off, s[0:3], s32 offset:120
+; ALIGNED-NEXT:    buffer_load_dword v72, off, s[0:3], s32 offset:124
+; ALIGNED-NEXT:    buffer_load_dword v63, off, s[0:3], s32 offset:128
+; ALIGNED-NEXT:    buffer_load_dword v62, off, s[0:3], s32 offset:132
+; ALIGNED-NEXT:    buffer_load_dword v61, off, s[0:3], s32 offset:136
+; ALIGNED-NEXT:    buffer_load_dword v60, off, s[0:3], s32 offset:140
+; ALIGNED-NEXT:    buffer_load_dword v59, off, s[0:3], s32 offset:144
+; ALIGNED-NEXT:    buffer_load_dword v58, off, s[0:3], s32 offset:148
+; ALIGNED-NEXT:    buffer_load_dword v57, off, s[0:3], s32 offset:152
+; ALIGNED-NEXT:    buffer_load_dword v56, off, s[0:3], s32 offset:156
+; ALIGNED-NEXT:    buffer_load_dword v47, off, s[0:3], s32 offset:160
+; ALIGNED-NEXT:    buffer_load_dword v46, off, s[0:3], s32 offset:164
+; ALIGNED-NEXT:    buffer_load_dword v45, off, s[0:3], s32 offset:168
+; ALIGNED-NEXT:    buffer_load_dword v44, off, s[0:3], s32 offset:172
+; ALIGNED-NEXT:    buffer_load_dword v43, off, s[0:3], s32 offset:176
+; ALIGNED-NEXT:    buffer_load_dword v42, off, s[0:3], s32 offset:180
+; ALIGNED-NEXT:    buffer_load_dword v41, off, s[0:3], s32 offset:184
+; ALIGNED-NEXT:    buffer_load_dword v40, off, s[0:3], s32 offset:188
+; ALIGNED-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; ALIGNED-NEXT:    s_setpc_b64 s[30:31]
+;
+; UNROLL3-LABEL: memmove_p0_p5_sz2048:
+; UNROLL3:       ; %bb.0: ; %entry
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; UNROLL3-NEXT:    v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0
+; UNROLL3-NEXT:    s_mov_b32 s6, exec_lo
+; UNROLL3-NEXT:    v_cndmask_b32_e32 v3, -1, v0, vcc_lo
+; UNROLL3-NEXT:    v_cmpx_ge_u32_e64 v2, v3
+; UNROLL3-NEXT:    s_xor_b32 s6, exec_lo, s6
+; UNROLL3-NEXT:    s_cbranch_execz .LBB9_4
+; UNROLL3-NEXT:  ; %bb.1: ; %memmove_fwd_loop.preheader
+; UNROLL3-NEXT:    v_mov_b32_e32 v3, v2
+; UNROLL3-NEXT:    s_inst_prefetch 0x1
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB9_2: ; %memmove_fwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    s_clause 0xb
+; UNROLL3-NEXT:    buffer_load_dword v4, v3, s[0:3], 0 offen
+; UNROLL3-NEXT:    buffer_load_dword v5, v3, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    buffer_load_dword v6, v3, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    buffer_load_dword v7, v3, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    buffer_load_dword v8, v3, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    buffer_load_dword v9, v3, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    buffer_load_dword v10, v3, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    buffer_load_dword v11, v3, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    buffer_load_dword v12, v3, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    buffer_load_dword v13, v3, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    buffer_load_dword v14, v3, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    buffer_load_dword v15, v3, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    v_add_co_u32 v16, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v17, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 48
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v3, 48, v3
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, 0
+; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[8:11] offset:16
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[4:7]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[16:17], v[12:15] offset:32
+; UNROLL3-NEXT:    s_cmp_lg_u64 s[4:5], 0x7e0
+; UNROLL3-NEXT:    s_cbranch_scc1 .LBB9_2
+; UNROLL3-NEXT:  ; %bb.3: ; %memmove_fwd_residual
+; UNROLL3-NEXT:    s_inst_prefetch 0x2
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:2016
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    ; implicit-def: $vgpr2
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:2032
+; UNROLL3-NEXT:    ; implicit-def: $vgpr0_vgpr1
+; UNROLL3-NEXT:  .LBB9_4: ; %Flow8
+; UNROLL3-NEXT:    s_andn2_saveexec_b32 s8, s6
+; UNROLL3-NEXT:    s_cbranch_execz .LBB9_7
+; UNROLL3-NEXT:  ; %bb.5: ; %memmove_bwd_residual
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:2032
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:2036
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:2040
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:2044
+; UNROLL3-NEXT:    s_movk_i32 s6, 0xffd0
+; UNROLL3-NEXT:    s_mov_b64 s[4:5], 0x7b0
+; UNROLL3-NEXT:    s_mov_b32 s7, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:2032
+; UNROLL3-NEXT:    s_clause 0x3
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen offset:2016
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:2020
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:2024
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:2028
+; UNROLL3-NEXT:    v_add_nc_u32_e32 v2, 0x7b0, v2
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[0:1], v[3:6] offset:2016
+; UNROLL3-NEXT:    s_inst_prefetch 0x1
+; UNROLL3-NEXT:    .p2align 6
+; UNROLL3-NEXT:  .LBB9_6: ; %memmove_bwd_loop
+; UNROLL3-NEXT:    ; =>This Inner Loop Header: Depth=1
+; UNROLL3-NEXT:    s_clause 0xb
+; UNROLL3-NEXT:    buffer_load_dword v3, v2, s[0:3], 0 offen
+; UNROLL3-NEXT:    buffer_load_dword v4, v2, s[0:3], 0 offen offset:4
+; UNROLL3-NEXT:    buffer_load_dword v5, v2, s[0:3], 0 offen offset:8
+; UNROLL3-NEXT:    buffer_load_dword v6, v2, s[0:3], 0 offen offset:12
+; UNROLL3-NEXT:    buffer_load_dword v7, v2, s[0:3], 0 offen offset:16
+; UNROLL3-NEXT:    buffer_load_dword v8, v2, s[0:3], 0 offen offset:20
+; UNROLL3-NEXT:    buffer_load_dword v9, v2, s[0:3], 0 offen offset:24
+; UNROLL3-NEXT:    buffer_load_dword v10, v2, s[0:3], 0 offen offset:28
+; UNROLL3-NEXT:    buffer_load_dword v11, v2, s[0:3], 0 offen offset:32
+; UNROLL3-NEXT:    buffer_load_dword v12, v2, s[0:3], 0 offen offset:36
+; UNROLL3-NEXT:    buffer_load_dword v13, v2, s[0:3], 0 offen offset:40
+; UNROLL3-NEXT:    buffer_load_dword v14, v2, s[0:3], 0 offen offset:44
+; UNROLL3-NEXT:    v_add_co_u32 v15, vcc_lo, v0, s4
+; UNROLL3-NEXT:    v_add_co_ci_u32_e32 v16, vcc_lo, s5, v1, vcc_lo
+; UNROLL3-NEXT:    v_subrev_nc_u32_e32 v2, 48, v2
+; UNROLL3-NEXT:    s_add_u32 s4, s4, 0xffffffd0
+; UNROLL3-NEXT:    s_addc_u32 s5, s5, -1
+; UNROLL3-NEXT:    s_waitcnt vmcnt(4)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[15:16], v[7:10] offset:16
+; UNROLL3-NEXT:    flat_store_dwordx4 v[15:16], v[3:6]
+; UNROLL3-NEXT:    s_waitcnt vmcnt(0)
+; UNROLL3-NEXT:    flat_store_dwordx4 v[15:16], v[11:14] offset:32
+; UNROLL3-NEXT:    s_cmp_eq_u64 s[4:5], s[6:7]
+; UNROLL3-NEXT:    s_cbranch_scc0 .LBB9_6
+; UNROLL3-NEXT:  .LBB9_7: ; %Flow9
+; UNROLL3-NEXT:    s_inst_prefetch 0x2
+; UNROLL3-NEXT:    s_or_b32 exec_lo, exec_lo, s8
+; UNROLL3-NEXT:    s_waitcnt lgkmcnt(0)
+; UNROLL3-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  tail call void @llvm.memmove.p0.p5.i64(ptr addrspace(0) noundef nonnull align 1 %dst, ptr addrspace(5) noundef nonnull align 1 %src, i64 2048, i1 false)
+  ret void
+}
+
+
+declare void @llvm.memcpy.p0.p0.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(0) noalias nocapture readonly, i64, i1 immarg) #2
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2
+declare void @llvm.memcpy.p0.p4.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2
+declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
+
+declare void @llvm.memcpy.p0.p5.i64(ptr addrspace(0) noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2
+
+declare void @llvm.memmove.p0.p0.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(0) nocapture readonly, i64, i1 immarg) #2
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #2
+declare void @llvm.memmove.p0.p4.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #2
+declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2
+
+declare void @llvm.memmove.p0.p5.i64(ptr addrspace(0) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #2
+
+attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll
new file mode 100644
index 000000000000000..b0d578e421e280c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Check that invalid IR is not produced on a vector typed
+; getelementptr with a scalar alloca pointer base.
+
+define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
+; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[GETELEMENTPTR]], i64 0
+; CHECK-NEXT:    store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %alloca = alloca i32, align 4, addrspace(5)
+  %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+  %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0
+  store i32 0, ptr addrspace(5) %extractelement
+  ret void
+}
+
+define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(
+; CHECK-SAME: i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[BB:.*:]]
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> <i64 3, i64 2, i64 1, i64 0>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]]
+; CHECK-NEXT:    [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1
+; CHECK-NEXT:    store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4
+; CHECK-NEXT:    ret void
+;
+bb:
+  %alloca = alloca i32, align 4, addrspace(5)
+  %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
+  %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> <i64 3, i64 2, i64 1, i64 0>
+  %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1
+  %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1
+  store i32 0, ptr addrspace(5) %extractelement
+  ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir b/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir
index c23c8900096fba1..be1a8aceb8c9032 100644
--- a/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir
+++ b/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes=early-tailduplication -o - %s | FileCheck %s
 
 ---
 name:            stop_duplicate_cfg_intrinsic
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll
index e286eb3226e46f8..ca2dc701bd1fb37 100644
--- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll
@@ -68,7 +68,7 @@ define float @fma_f32(float %x, float %y, float %z) #0 {
 ; CHECK-NOSP: bl __aeabi_f2iz
 ; CHECK-SP: vcvt.s32.f32
 define i32 @fptosi_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.fptosi.f32(float %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -79,9 +79,9 @@ define i32 @fptosi_f32(float %x) #0 {
 ; FIXME-CHECK-SP: vcvt.s32.f32
 define void @fptosi_f32_twice(float %arg, ptr %ptr) #0 {
 entry:
-  %conv = call i32 @llvm.experimental.constrained.fptosi.f32(float %arg, metadata !"fpexcept.strict") #0
+  %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0
   store i32 %conv, ptr %ptr, align 4
-  %conv1 = call i32 @llvm.experimental.constrained.fptosi.f32(float %arg, metadata !"fpexcept.strict") #0
+  %conv1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0
   %idx = getelementptr inbounds i32, ptr %ptr, i32 1
   store i32 %conv1, ptr %idx, align 4
   ret void
@@ -91,7 +91,7 @@ entry:
 ; CHECK-NOSP: bl __aeabi_f2uiz
 ; FIXME-CHECK-SP: vcvt.u32.f32
 define i32 @fptoui_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.fptoui.f32(float %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -102,9 +102,9 @@ define i32 @fptoui_f32(float %x) #0 {
 ; FIXME-CHECK-SP: vcvt.u32.f32
 define void @fptoui_f32_twice(float %arg, ptr %ptr) #0 {
 entry:
-  %conv = call i32 @llvm.experimental.constrained.fptoui.f32(float %arg, metadata !"fpexcept.strict") #0
+  %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %arg, metadata !"fpexcept.strict") #0
   store i32 %conv, ptr %ptr, align 4
-  %conv1 = call i32 @llvm.experimental.constrained.fptoui.f32(float %arg, metadata !"fpexcept.strict") #0
+  %conv1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %arg, metadata !"fpexcept.strict") #0
   %idx = getelementptr inbounds i32, ptr %ptr, i32 1
   store i32 %conv1, ptr %idx, align 4
   ret void
@@ -209,14 +209,14 @@ define float @nearbyint_f32(float %x) #0 {
 ; CHECK-LABEL: lrint_f32:
 ; CHECK: bl lrintf
 define i32 @lrint_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llrint_f32:
 ; CHECK: bl llrintf
 define i32 @llrint_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.llrint.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.llrint.i32.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -259,14 +259,14 @@ define float @floor_f32(float %x) #0 {
 ; CHECK-LABEL: lround_f32:
 ; CHECK: bl lroundf
 define i32 @lround_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lround.f32(float %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llround_f32:
 ; CHECK: bl llroundf
 define i32 @llround_f32(float %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.llround.f32(float %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.llround.i32.f32(float %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -562,7 +562,7 @@ define double @fma_f64(double %x, double %y, double %z) #0 {
 ; CHECK-NODP: bl __aeabi_d2iz
 ; CHECK-DP: vcvt.s32.f64
 define i32 @fptosi_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.fptosi.f64(double %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -570,7 +570,7 @@ define i32 @fptosi_f64(double %x) #0 {
 ; CHECK-NODP: bl __aeabi_d2uiz
 ; FIXME-CHECK-DP: vcvt.u32.f64
 define i32 @fptoui_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.fptoui.f64(double %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -673,14 +673,14 @@ define double @nearbyint_f64(double %x) #0 {
 ; CHECK-LABEL: lrint_f64:
 ; CHECK: bl lrint
 define i32 @lrint_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llrint_f64:
 ; CHECK: bl llrint
 define i32 @llrint_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.llrint.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.llrint.i32.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -723,14 +723,14 @@ define double @floor_f64(double %x) #0 {
 ; CHECK-LABEL: lround_f64:
 ; CHECK: bl lround
 define i32 @lround_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.lround.f64(double %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.lround.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
 ; CHECK-LABEL: llround_f64:
 ; CHECK: bl llround
 define i32 @llround_f64(double %x) #0 {
-  %val = call i32 @llvm.experimental.constrained.llround.f64(double %x, metadata !"fpexcept.strict") #0
+  %val = call i32 @llvm.experimental.constrained.llround.i32.f64(double %x, metadata !"fpexcept.strict") #0
   ret i32 %val
 }
 
@@ -1031,8 +1031,8 @@ declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, me
 declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
 declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata)
 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
-declare i32 @llvm.experimental.constrained.fptosi.f32(float, metadata)
-declare i32 @llvm.experimental.constrained.fptoui.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata)
 declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata)
@@ -1046,14 +1046,14 @@ declare float @llvm.experimental.constrained.exp.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.exp2.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
-declare i32 @llvm.experimental.constrained.lrint.f32(float, metadata, metadata)
-declare i32 @llvm.experimental.constrained.llrint.f32(float, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f32(float, metadata, metadata)
+declare i32 @llvm.experimental.constrained.llrint.i32.f32(float, metadata, metadata)
 declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata)
 declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata)
 declare float @llvm.experimental.constrained.ceil.f32(float, metadata)
 declare float @llvm.experimental.constrained.floor.f32(float, metadata)
-declare i32 @llvm.experimental.constrained.lround.f32(float, metadata)
-declare i32 @llvm.experimental.constrained.llround.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.llround.i32.f32(float, metadata)
 declare float @llvm.experimental.constrained.round.f32(float, metadata)
 declare float @llvm.experimental.constrained.trunc.f32(float, metadata)
 declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
@@ -1065,8 +1065,8 @@ declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata,
 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
-declare i32 @llvm.experimental.constrained.fptosi.f64(double, metadata)
-declare i32 @llvm.experimental.constrained.fptoui.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata)
 declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata)
@@ -1080,14 +1080,14 @@ declare double @llvm.experimental.constrained.exp.f64(double, metadata, metadata
 declare double @llvm.experimental.constrained.exp2.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
-declare i32 @llvm.experimental.constrained.lrint.f64(double, metadata, metadata)
-declare i32 @llvm.experimental.constrained.llrint.f64(double, metadata, metadata)
+declare i32 @llvm.experimental.constrained.lrint.i32.f64(double, metadata, metadata)
+declare i32 @llvm.experimental.constrained.llrint.i32.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata)
 declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata)
 declare double @llvm.experimental.constrained.ceil.f64(double, metadata)
 declare double @llvm.experimental.constrained.floor.f64(double, metadata)
-declare i32 @llvm.experimental.constrained.lround.f64(double, metadata)
-declare i32 @llvm.experimental.constrained.llround.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.lround.i32.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.llround.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.round.f64(double, metadata)
 declare double @llvm.experimental.constrained.trunc.f64(double, metadata)
 declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
diff --git a/llvm/test/CodeGen/ARM/llvm.sincos.ll b/llvm/test/CodeGen/ARM/llvm.sincos.ll
new file mode 100644
index 000000000000000..9628405df6bcb9e
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/llvm.sincos.ll
@@ -0,0 +1,223 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=thumbv7-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+
+define { half, half } @test_sincos_f16(half %a) {
+; CHECK-LABEL: test_sincos_f16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    pop {r4, pc}
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_sin:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    pop {r7, pc}
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  %result.0 = extractvalue { half, half } %result, 0
+  ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_cos:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr r0, [sp]
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    pop {r7, pc}
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  %result.1 = extractvalue { half, half } %result, 1
+  ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vpush {d8}
+; CHECK-NEXT:    sub sp, #24
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    add r1, sp, #12
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    mov r0, r4
+; CHECK-NEXT:    bl __gnu_h2f_ieee
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr r0, [sp, #12]
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    ldr r1, [sp, #4]
+; CHECK-NEXT:    strh.w r0, [sp, #22]
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    strh.w r0, [sp, #20]
+; CHECK-NEXT:    add r0, sp, #20
+; CHECK-NEXT:    vld1.32 {d8[0]}, [r0:32]
+; CHECK-NEXT:    ldr r0, [sp, #8]
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    ldr r1, [sp]
+; CHECK-NEXT:    strh.w r0, [sp, #18]
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bl __gnu_f2h_ieee
+; CHECK-NEXT:    strh.w r0, [sp, #16]
+; CHECK-NEXT:    add r0, sp, #16
+; CHECK-NEXT:    vmovl.u16 q9, d8
+; CHECK-NEXT:    vld1.32 {d16[0]}, [r0:32]
+; CHECK-NEXT:    vmovl.u16 q8, d16
+; CHECK-NEXT:    vmov.32 r0, d18[0]
+; CHECK-NEXT:    vmov.32 r1, d18[1]
+; CHECK-NEXT:    vmov.32 r2, d16[0]
+; CHECK-NEXT:    vmov.32 r3, d16[1]
+; CHECK-NEXT:    add sp, #24
+; CHECK-NEXT:    vpop {d8}
+; CHECK-NEXT:    pop {r4, pc}
+  %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+; CHECK-LABEL: test_sincos_f32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldrd r1, r0, [sp], #8
+; CHECK-NEXT:    pop {r7, pc}
+  %result = call { float, float } @llvm.sincos.f32(float %a)
+  ret { float, float } %result
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    vpush {d8}
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    vmov d8, r0, r1
+; CHECK-NEXT:    add r1, sp, #4
+; CHECK-NEXT:    mov r2, sp
+; CHECK-NEXT:    vmov r0, s17
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    vmov r0, s16
+; CHECK-NEXT:    add r1, sp, #12
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    vldr s1, [sp, #4]
+; CHECK-NEXT:    vldr s3, [sp]
+; CHECK-NEXT:    vldr s0, [sp, #12]
+; CHECK-NEXT:    vldr s2, [sp, #8]
+; CHECK-NEXT:    vmov r0, r1, d0
+; CHECK-NEXT:    vmov r2, r3, d1
+; CHECK-NEXT:    add sp, #16
+; CHECK-NEXT:    vpop {d8}
+; CHECK-NEXT:    pop {r7, pc}
+  %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+; CHECK-LABEL: test_sincos_f64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    sub sp, #16
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    mov r3, sp
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldrd r0, r1, [sp, #8]
+; CHECK-NEXT:    ldrd r2, r3, [sp], #16
+; CHECK-NEXT:    pop {r7, pc}
+  %result = call { double, double } @llvm.sincos.f64(double %a)
+  ret { double, double } %result
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    sub sp, #32
+; CHECK-NEXT:    mov r1, r3
+; CHECK-NEXT:    mov r12, r2
+; CHECK-NEXT:    add r2, sp, #24
+; CHECK-NEXT:    add r3, sp, #16
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    mov r0, r12
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldrd r0, r1, [sp, #40]
+; CHECK-NEXT:    add r2, sp, #8
+; CHECK-NEXT:    mov r3, sp
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    vldr d19, [sp, #8]
+; CHECK-NEXT:    vldr d18, [sp, #24]
+; CHECK-NEXT:    vldr d17, [sp]
+; CHECK-NEXT:    vldr d16, [sp, #16]
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r4]!
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r4]
+; CHECK-NEXT:    add sp, #32
+; CHECK-NEXT:    pop {r4, pc}
+  %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
+
+define { fp128, fp128 } @test_sincos_f128(fp128 %a) {
+; CHECK-LABEL: test_sincos_f128:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    push {r4, r5, r7, lr}
+; CHECK-NEXT:    sub sp, #40
+; CHECK-NEXT:    mov r12, r3
+; CHECK-NEXT:    ldr r3, [sp, #56]
+; CHECK-NEXT:    add.w lr, sp, #8
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    add r0, sp, #24
+; CHECK-NEXT:    strd r0, lr, [sp]
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    mov r1, r2
+; CHECK-NEXT:    mov r2, r12
+; CHECK-NEXT:    bl sincosl
+; CHECK-NEXT:    ldrd r2, r3, [sp, #16]
+; CHECK-NEXT:    ldrd r12, r1, [sp, #8]
+; CHECK-NEXT:    str r3, [r4, #28]
+; CHECK-NEXT:    ldrd r3, r5, [sp, #32]
+; CHECK-NEXT:    ldrd lr, r0, [sp, #24]
+; CHECK-NEXT:    strd r1, r2, [r4, #20]
+; CHECK-NEXT:    add.w r1, r4, #8
+; CHECK-NEXT:    stm.w r1, {r3, r5, r12}
+; CHECK-NEXT:    strd lr, r0, [r4]
+; CHECK-NEXT:    add sp, #40
+; CHECK-NEXT:    pop {r4, r5, r7, pc}
+  %result = call { fp128, fp128 } @llvm.sincos.f16(fp128 %a)
+  ret { fp128, fp128 } %result
+}
diff --git a/llvm/test/CodeGen/DirectX/countbits.ll b/llvm/test/CodeGen/DirectX/countbits.ll
index c6bc2b6790948ee..f03ab9c5e79c354 100644
--- a/llvm/test/CodeGen/DirectX/countbits.ll
+++ b/llvm/test/CodeGen/DirectX/countbits.ll
@@ -4,35 +4,67 @@
 
 define noundef i16 @test_countbits_short(i16 noundef %a) {
 entry:
-; CHECK: call i16 @dx.op.unary.i16(i32 31, i16 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK-NEXT: [[B:%.*]] = trunc i32 [[A]] to i16
+; CHECK-NEXT ret i16 [[B]]
   %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
   ret i16 %elt.ctpop
 }
 
+define noundef i32 @test_countbits_short2(i16 noundef %a) {
+entry:
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK-NEXT: ret i32 [[A]]
+  %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+  %elt.zext = zext i16 %elt.ctpop to i32
+  ret i32 %elt.zext
+}
+
+define noundef i32 @test_countbits_short3(i16 noundef %a) {
+entry:
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i16(i32 31, i16 %{{.*}})
+; CHECK-NEXT: ret i32 [[A]]
+  %elt.ctpop = call i16 @llvm.ctpop.i16(i16 %a)
+  %elt.sext = sext i16 %elt.ctpop to i32
+  ret i32 %elt.sext
+}
+
 define noundef i32 @test_countbits_int(i32 noundef %a) {
 entry:
-; CHECK: call i32 @dx.op.unary.i32(i32 31, i32 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 %{{.*}})
+; CHECK-NEXT: ret i32 [[A]]
   %elt.ctpop = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %elt.ctpop
 }
 
 define noundef i64 @test_countbits_long(i64 noundef %a) {
 entry:
-; CHECK: call i64 @dx.op.unary.i64(i32 31, i64 %{{.*}})
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}})
+; CHECK-NEXT: [[B:%.*]] = zext i32 [[A]] to i64
+; CHECK-NEXT ret i64 [[B]]
   %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %elt.ctpop
 }
 
+define noundef i32 @test_countbits_long2(i64 noundef %a) {
+entry:
+; CHECK: [[A:%.*]] = call i32 @dx.op.unaryBits.i64(i32 31, i64 %{{.*}})
+; CHECK-NEXT: ret i32 [[A]]
+  %elt.ctpop = call i64 @llvm.ctpop.i64(i64 %a)
+  %elt.trunc = trunc i64 %elt.ctpop to i32
+  ret i32 %elt.trunc
+}
+
 define noundef <4 x i32> @countbits_vec4_i32(<4 x i32> noundef %a)  {
 entry:
   ; CHECK: [[ee0:%.*]] = extractelement <4 x i32> %a, i64 0
-  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee0]])
+  ; CHECK: [[ie0:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee0]])
   ; CHECK: [[ee1:%.*]] = extractelement <4 x i32> %a, i64 1
-  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee1]])
+  ; CHECK: [[ie1:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee1]])
   ; CHECK: [[ee2:%.*]] = extractelement <4 x i32> %a, i64 2
-  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee2]])
+  ; CHECK: [[ie2:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee2]])
   ; CHECK: [[ee3:%.*]] = extractelement <4 x i32> %a, i64 3
-  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unary.i32(i32 31, i32 [[ee3]])
+  ; CHECK: [[ie3:%.*]] = call i32 @dx.op.unaryBits.i32(i32 31, i32 [[ee3]])
   ; CHECK: insertelement <4 x i32> poison, i32 [[ie0]], i64 0
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie1]], i64 1
   ; CHECK: insertelement <4 x i32> %{{.*}}, i32 [[ie2]], i64 2
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
new file mode 100644
index 000000000000000..baf93d4e177f0fa
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s
+
+define void @test_group_memory_barrier_with_group_sync() {
+entry:
+  ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+  call void @llvm.dx.group.memory.barrier.with.group.sync()
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/DirectX/split-double.ll b/llvm/test/CodeGen/DirectX/split-double.ll
deleted file mode 100644
index 759590fa56279b1..000000000000000
--- a/llvm/test/CodeGen/DirectX/split-double.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
-
-define void @test_vector_double_split_void(<2 x double> noundef %d) {
-; CHECK-LABEL: define void @test_vector_double_split_void(
-; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
-; CHECK-NEXT:    [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
-; CHECK-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
-; CHECK-NEXT:    [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
-; CHECK-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
-; CHECK-NEXT:    ret void
-;
-  %hlsl.asuint = call { <2 x i32>, <2 x i32> }  @llvm.dx.splitdouble.v2i32(<2 x double> %d)
-  ret void
-}
-
-define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
-; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split(
-; CHECK-SAME: <3 x double> noundef [[D:%.*]]) {
-; CHECK-NEXT:    [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0
-; CHECK-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
-; CHECK-NEXT:    [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1
-; CHECK-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
-; CHECK-NEXT:    [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2
-; CHECK-NEXT:    [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]])
-; CHECK-NEXT:    [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
-; CHECK-NEXT:    [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0
-; CHECK-NEXT:    [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0
-; CHECK-NEXT:    [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1
-; CHECK-NEXT:    [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1
-; CHECK-NEXT:    [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1
-; CHECK-NEXT:    [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]]
-; CHECK-NEXT:    [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]]
-; CHECK-NEXT:    [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]]
-; CHECK-NEXT:    [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0
-; CHECK-NEXT:    [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2
-; CHECK-NEXT:    ret <3 x i32> [[TMP1]]
-;
-  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
-  %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
-  %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
-  %3 = add <3 x i32> %1, %2
-  ret <3 x i32> %3
-}
diff --git a/llvm/test/CodeGen/DirectX/splitdouble.ll b/llvm/test/CodeGen/DirectX/splitdouble.ll
new file mode 100644
index 000000000000000..1443ba6269255a9
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/splitdouble.ll
@@ -0,0 +1,76 @@
+; RUN: opt -passes='function(scalarizer)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,NOLOWER
+; RUN: opt -passes='function(scalarizer),module(dxil-op-lower)' -S -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s --check-prefixes=CHECK,WITHLOWER
+
+define i32 @test_scalar(double noundef %D) {
+; CHECK-LABEL: define i32 @test_scalar(
+; CHECK-SAME: double noundef [[D:%.*]]) {
+; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D]])
+; NOLOWER-NEXT:    [[EV1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
+; NOLOWER-NEXT:    [[EV2:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1
+; WITHLOWER-NEXT:  [[EV1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0
+; WITHLOWER-NEXT:  [[EV2:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 1
+; CHECK-NEXT:      [[ADD:%.*]] = add i32 [[EV1]], [[EV2]]
+; CHECK-NEXT:      ret i32 [[ADD]]
+;
+  %hlsl.splitdouble = call { i32, i32 } @llvm.dx.splitdouble.i32(double %D)
+  %1 = extractvalue { i32, i32 } %hlsl.splitdouble, 0
+  %2 = extractvalue { i32, i32 } %hlsl.splitdouble, 1
+  %add = add i32 %1, %2
+  ret i32 %add
+}
+
+
+define void @test_vector_double_split_void(<2 x double> noundef %d) {
+; CHECK-LABEL: define void @test_vector_double_split_void(
+; CHECK-SAME: <2 x double> noundef [[D:%.*]]) {
+; CHECK-NEXT:      [[D_I0:%.*]] = extractelement <2 x double> [[D]], i64 0
+; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]])
+; CHECK-NEXT:      [[D_I1:%.*]] = extractelement <2 x double> [[D]], i64 1
+; NOLOWER-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]])
+; CHECK-NEXT:      ret void
+;
+  %hlsl.asuint = call { <2 x i32>, <2 x i32> }  @llvm.dx.splitdouble.v2i32(<2 x double> %d)
+  ret void
+}
+
+define noundef <3 x i32> @test_vector_double_split(<3 x double> noundef %d) {
+; CHECK-LABEL: define noundef <3 x i32> @test_vector_double_split(
+; CHECK-SAME: <3 x double> noundef [[D:%.*]]) {
+; CHECK-NEXT:      [[D_I0:%.*]] = extractelement <3 x double> [[D]], i64 0
+; NOLOWER-NEXT:    [[HLSL_ASUINT_I0:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I0]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I0:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I0]])
+; CHECK-NEXT:      [[D_I1:%.*]] = extractelement <3 x double> [[D]], i64 1
+; NOLOWER-NEXT:    [[HLSL_ASUINT_I1:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I1]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I1:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I1]])
+; CHECK-NEXT:      [[D_I2:%.*]] = extractelement <3 x double> [[D]], i64 2
+; NOLOWER-NEXT:    [[HLSL_ASUINT_I2:%.*]] = call { i32, i32 } @llvm.dx.splitdouble.i32(double [[D_I2]])
+; WITHLOWER-NEXT:  [[HLSL_ASUINT_I2:%.*]] = call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102, double [[D_I2]])
+; NOLOWER-NEXT:    [[DOTELEM0:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 0
+; WITHLOWER-NEXT:  [[DOTELEM0:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 0
+; NOLOWER-NEXT:    [[DOTELEM01:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 0
+; WITHLOWER-NEXT:  [[DOTELEM01:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I1]], 0
+; NOLOWER-NEXT:    [[DOTELEM02:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 0
+; WITHLOWER-NEXT:  [[DOTELEM02:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I2]], 0
+; NOLOWER-NEXT:    [[DOTELEM1:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I0]], 1
+; WITHLOWER-NEXT:  [[DOTELEM1:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I0]], 1
+; NOLOWER-NEXT:    [[DOTELEM13:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I1]], 1
+; WITHLOWER-NEXT:  [[DOTELEM13:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I1]], 1
+; NOLOWER-NEXT:    [[DOTELEM14:%.*]] = extractvalue { i32, i32 } [[HLSL_ASUINT_I2]], 1
+; WITHLOWER-NEXT:  [[DOTELEM14:%.*]] = extractvalue %dx.types.splitdouble [[HLSL_ASUINT_I2]], 1
+; CHECK-NEXT:      [[DOTI0:%.*]] = add i32 [[DOTELEM0]], [[DOTELEM1]]
+; CHECK-NEXT:      [[DOTI1:%.*]] = add i32 [[DOTELEM01]], [[DOTELEM13]]
+; CHECK-NEXT:      [[DOTI2:%.*]] = add i32 [[DOTELEM02]], [[DOTELEM14]]
+; CHECK-NEXT:      [[DOTUPTO015:%.*]] = insertelement <3 x i32> poison, i32 [[DOTI0]], i64 0
+; CHECK-NEXT:      [[DOTUPTO116:%.*]] = insertelement <3 x i32> [[DOTUPTO015]], i32 [[DOTI1]], i64 1
+; CHECK-NEXT:      [[TMP1:%.*]] = insertelement <3 x i32> [[DOTUPTO116]], i32 [[DOTI2]], i64 2
+; CHECK-NEXT:      ret <3 x i32> [[TMP1]]
+;
+  %hlsl.asuint = call { <3 x i32>, <3 x i32> }  @llvm.dx.splitdouble.v3i32(<3 x double> %d)
+  %1 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 0
+  %2 = extractvalue { <3 x i32>, <3 x i32> } %hlsl.asuint, 1
+  %3 = add <3 x i32> %1, %2
+  ret <3 x i32> %3
+}
diff --git a/llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir b/llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir
new file mode 100644
index 000000000000000..111c886f585cf65
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/MIRStripDebug/bundles.mir
@@ -0,0 +1,63 @@
+# RUN: llc -run-pass=mir-strip-debug -o - %s | FileCheck %s
+# RUN: llc -run-pass=mir-strip-debug,mir-debugify,mir-strip-debug -o - %s | FileCheck %s
+
+--- |
+  source_filename = "loc-only.ll"
+  
+  define i32 @test(i32 %a, i32 %b) !dbg !6 {
+    %add = add i32 %a, 2, !dbg !12
+    call void @llvm.dbg.value(metadata i32 %add, metadata !9, metadata !DIExpression()), !dbg !12
+    %sub = sub i32 %add, %b, !dbg !13
+    call void @llvm.dbg.value(metadata i32 %sub, metadata !11, metadata !DIExpression()), !dbg !13
+    ret i32 %sub, !dbg !14
+  }
+  
+  declare void @llvm.dbg.value(metadata, metadata, metadata)
+  
+  !llvm.dbg.cu = !{!0}
+  ; CHECK-NOT: !llvm.dbg.cu
+  !llvm.debugify = !{!3, !4}
+  ; CHECK-NOT: !llvm.debugify
+  !llvm.module.flags = !{!5}
+  ; CHECK-NOT: !llvm.module.flags
+
+  ; CHECK-NOT: !DI
+  !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+  !1 = !DIFile(filename: "<stdin>", directory: "/")
+  !2 = !{}
+  !3 = !{i32 3}
+  !4 = !{i32 2}
+  !5 = !{i32 2, !"Debug Info Version", i32 3}
+  !6 = distinct !DISubprogram(name: "test", linkageName: "test", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+  !7 = !DISubroutineType(types: !2)
+  !8 = !{!9, !11}
+  !9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
+  !10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
+  !11 = !DILocalVariable(name: "2", scope: !6, file: !1, line: 2, type: !10)
+  !12 = !DILocation(line: 1, column: 1, scope: !6)
+  !13 = !DILocation(line: 2, column: 1, scope: !6)
+  !14 = !DILocation(line: 3, column: 1, scope: !6)
+
+...
+---
+name:            test
+body:             |
+  bb.1 (%ir-block.0):
+    %0:_(s32) = G_IMPLICIT_DEF
+    %1:_(s32) = G_IMPLICIT_DEF
+    BUNDLE {
+      %2:_(s32) = G_CONSTANT i32 2, debug-location !DILocation(line: 0, scope: !6)
+      %3:_(s32) = G_ADD %0, %1, debug-location !12
+    }
+
+    ; CHECK-LABEL: body:
+    ; CHECK-NOT: debug-location
+    ; CHECK-NOT: !DI
+    ; CHECK-NEXT:    bb
+    ; CHECK-NEXT:      %0:_(s32) = G_IMPLICIT_DEF{{$}}
+    ; CHECK-NEXT:      %1:_(s32) = G_IMPLICIT_DEF{{$}}
+    ; CHECK-NEXT:      BUNDLE {
+    ; CHECK-NEXT:        %2:_(s32) = G_CONSTANT i32 2{{$}}
+    ; CHECK-NEXT:        %3:_(s32) = G_ADD %0, %1{{$}}
+    ; CHECK-NEXT:      }
+...
diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
index a63bb8452ebbe14..d2b063a057139bc 100644
--- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
@@ -12,6 +12,11 @@
   entry:
     ret i32 2
   }
+
+  define dso_local i32 @foobar() "sign-return-address"="all" "branch-protection-pauth-lr"="true" {
+  entry:
+    ret i32 2
+  }
 ...
 ---
 #CHECK: foo
@@ -46,3 +51,21 @@ body:             |
     RET_ReallyLR implicit killed $w0
 
 ...
+---
+#CHECK: foobar
+name:            foobar
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  maxCallFrameSize: 0
+#CHECK:    frame-setup PACM
+#CHECK:    frame-setup PACIASP implicit-def $lr, implicit $lr, implicit $sp, pre-instr-symbol <mcsymbol >
+#CHECK:    frame-setup CFI_INSTRUCTION negate_ra_sign_state_with_pc
+#CHECK:    frame-destroy PACM
+#CHECK:    frame-destroy AUTIASP implicit-def $lr, implicit $lr, implicit $sp
+body:             |
+  bb.0.entry:
+    $w0 = MOVi32imm 2
+    RET_ReallyLR implicit killed $w0
+
+...
diff --git a/llvm/test/CodeGen/PowerPC/copysignl.ll b/llvm/test/CodeGen/PowerPC/copysignl.ll
index 427826daa2c6382..40ed3d803094f45 100644
--- a/llvm/test/CodeGen/PowerPC/copysignl.ll
+++ b/llvm/test/CodeGen/PowerPC/copysignl.ll
@@ -1,82 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=-vsx < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s -check-prefix=CHECK-VSX
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
 define double @foo_d_ll(ppc_fp128 %a, ppc_fp128 %b) #0 {
+; CHECK-LABEL: foo_d_ll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 1, 3, 1
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_d_ll:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    xscpsgndp 1, 3, 1
+; CHECK-VSX-NEXT:    blr
 entry:
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %a, ppc_fp128 %b) #0
   %conv = fptrunc ppc_fp128 %call to double
   ret double %conv
-
-; CHECK-LABEL: @foo_d_ll
-; CHECK: fcpsgn 1, 3, 1
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_d_ll
-; CHECK-VSX: xscpsgndp 1, 3, 1
-; CHECK-VSX: blr
 }
 
 declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
 
 define double @foo_dl(double %a, ppc_fp128 %b) #0 {
+; CHECK-LABEL: foo_dl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fcpsgn 1, 2, 1
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_dl:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    xscpsgndp 1, 2, 1
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fptrunc ppc_fp128 %b to double
   %call = tail call double @copysign(double %a, double %conv) #0
   ret double %call
-
-; CHECK-LABEL: @foo_dl
-; CHECK: fcpsgn 1, 2, 1
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_dl
-; CHECK-VSX: xscpsgndp 1, 2, 1
-; CHECK-VSX: blr
 }
 
 declare double @copysign(double, double) #0
 
 define ppc_fp128 @foo_ll(double %a, ppc_fp128 %b) #0 {
+; CHECK-LABEL: foo_ll:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    fmr 3, 2
+; CHECK-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; CHECK-NEXT:    std 0, 128(1)
+; CHECK-NEXT:    lfs 2, .LCPI2_0@toc@l(3)
+; CHECK-NEXT:    bl copysignl
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 1, 1, 112
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_ll:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    mflr 0
+; CHECK-VSX-NEXT:    stdu 1, -112(1)
+; CHECK-VSX-NEXT:    fmr 3, 2
+; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
+; CHECK-VSX-NEXT:    std 0, 128(1)
+; CHECK-VSX-NEXT:    bl copysignl
+; CHECK-VSX-NEXT:    nop
+; CHECK-VSX-NEXT:    addi 1, 1, 112
+; CHECK-VSX-NEXT:    ld 0, 16(1)
+; CHECK-VSX-NEXT:    mtlr 0
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fpext double %a to ppc_fp128
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %b) #0
   ret ppc_fp128 %call
-
-; CHECK-LABEL: @foo_ll
-; CHECK: bl copysignl
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_ll
-; CHECK-VSX: bl copysignl
-; CHECK-VSX: blr
 }
 
 define ppc_fp128 @foo_ld(double %a, double %b) #0 {
+; CHECK-LABEL: foo_ld:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    fmr 3, 2
+; CHECK-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; CHECK-NEXT:    std 0, 128(1)
+; CHECK-NEXT:    lfs 2, .LCPI3_0@toc@l(3)
+; CHECK-NEXT:    bl copysignl
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 1, 1, 112
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_ld:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    mflr 0
+; CHECK-VSX-NEXT:    stdu 1, -112(1)
+; CHECK-VSX-NEXT:    fmr 3, 2
+; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
+; CHECK-VSX-NEXT:    std 0, 128(1)
+; CHECK-VSX-NEXT:    bl copysignl
+; CHECK-VSX-NEXT:    nop
+; CHECK-VSX-NEXT:    addi 1, 1, 112
+; CHECK-VSX-NEXT:    ld 0, 16(1)
+; CHECK-VSX-NEXT:    mtlr 0
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fpext double %a to ppc_fp128
   %conv1 = fpext double %b to ppc_fp128
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
   ret ppc_fp128 %call
-
-; CHECK-LABEL: @foo_ld
-; CHECK: bl copysignl
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_ld
-; CHECK-VSX: bl copysignl
-; CHECK-VSX: blr
 }
 
 define ppc_fp128 @foo_lf(double %a, float %b) #0 {
+; CHECK-LABEL: foo_lf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr 0
+; CHECK-NEXT:    stdu 1, -112(1)
+; CHECK-NEXT:    fmr 3, 2
+; CHECK-NEXT:    addis 3, 2, .LCPI4_0@toc@ha
+; CHECK-NEXT:    std 0, 128(1)
+; CHECK-NEXT:    lfs 2, .LCPI4_0@toc@l(3)
+; CHECK-NEXT:    bl copysignl
+; CHECK-NEXT:    nop
+; CHECK-NEXT:    addi 1, 1, 112
+; CHECK-NEXT:    ld 0, 16(1)
+; CHECK-NEXT:    mtlr 0
+; CHECK-NEXT:    blr
+;
+; CHECK-VSX-LABEL: foo_lf:
+; CHECK-VSX:       # %bb.0: # %entry
+; CHECK-VSX-NEXT:    mflr 0
+; CHECK-VSX-NEXT:    stdu 1, -112(1)
+; CHECK-VSX-NEXT:    fmr 3, 2
+; CHECK-VSX-NEXT:    xxlxor 2, 2, 2
+; CHECK-VSX-NEXT:    std 0, 128(1)
+; CHECK-VSX-NEXT:    bl copysignl
+; CHECK-VSX-NEXT:    nop
+; CHECK-VSX-NEXT:    addi 1, 1, 112
+; CHECK-VSX-NEXT:    ld 0, 16(1)
+; CHECK-VSX-NEXT:    mtlr 0
+; CHECK-VSX-NEXT:    blr
 entry:
   %conv = fpext double %a to ppc_fp128
   %conv1 = fpext float %b to ppc_fp128
   %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0
   ret ppc_fp128 %call
-
-; CHECK-LABEL: @foo_lf
-; CHECK: bl copysignl
-; CHECK: blr
-; CHECK-VSX-LABEL: @foo_lf
-; CHECK-VSX: bl copysignl
-; CHECK-VSX: blr
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index 76f3dea5b7751d4..3e8935e7d5977bf 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1083,7 +1083,7 @@ define float @test_fptrunc_ppc_fp128_f32(ppc_fp128 %first) #0 {
 ; PC64-NEXT:    frsp 1, 1
 ; PC64-NEXT:    blr
 entry:
-  %fptrunc = call float @llvm.experimental.constrained.fptrunc.ppcf128.f32(
+  %fptrunc = call float @llvm.experimental.constrained.fptrunc.f32.ppcf128.f32(
                     ppc_fp128 %first,
                     metadata !"round.dynamic",
                     metadata !"fpexcept.strict") #1
@@ -1103,7 +1103,7 @@ define double @test_fptrunc_ppc_fp128_f64(ppc_fp128 %first) #0 {
 ; PC64:       # %bb.0: # %entry
 ; PC64-NEXT:    blr
 entry:
-  %fptrunc = call double @llvm.experimental.constrained.fptrunc.ppcf128.f64(
+  %fptrunc = call double @llvm.experimental.constrained.fptrunc.f64.ppcf128(
                     ppc_fp128 %first,
                     metadata !"round.dynamic",
                     metadata !"fpexcept.strict") #1
@@ -1127,7 +1127,7 @@ define ppc_fp128 @test_fpext_ppc_fp128_f32(float %first) #0 {
 ; PC64-NEXT:    lfs 2, .LCPI26_0@toc@l(3)
 ; PC64-NEXT:    blr
 entry:
-  %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128(
+  %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f32(
                     float %first,
                     metadata !"fpexcept.strict") #1
   ret ppc_fp128 %fpext
@@ -1150,7 +1150,7 @@ define ppc_fp128 @test_fpext_ppc_fp128_f64(double %first) #0 {
 ; PC64-NEXT:    lfs 2, .LCPI27_0@toc@l(3)
 ; PC64-NEXT:    blr
 entry:
-  %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.f64.ppcf128(
+  %fpext = call ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f64(
                     double %first,
                     metadata !"fpexcept.strict") #1
   ret ppc_fp128 %fpext
@@ -1568,7 +1568,7 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
 ; PC64-NEXT:    mtlr 0
 ; PC64-NEXT:    blr
   %load = load float, ptr %firstptr
-  %first = call ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128(
+  %first = call ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f32(
                     float %load,
                     metadata !"fpexcept.strict") #1
   store ppc_fp128 %first, ptr %result
@@ -1598,7 +1598,7 @@ define void @test_constrained_libcall_multichain(ptr %firstptr, ptr %result) #0
                     i32 2,
                     metadata !"round.dynamic",
                     metadata !"fpexcept.strict") #1
-  %tinypow = call float @llvm.experimental.constrained.fptrunc.ppcf128.f32(
+  %tinypow = call float @llvm.experimental.constrained.fptrunc.f32.ppcf128(
                     ppc_fp128 %powi,
                     metadata !"round.dynamic",
                     metadata !"fpexcept.strict") #1
@@ -2015,7 +2015,7 @@ define i1 @ppcq_to_s1(ppc_fp128 %a) {
 ; PC64-NEXT:    mtlr 0
 ; PC64-NEXT:    blr
 entry:
-  %conv = tail call i1 @llvm.experimental.constrained.fptosi.ppcf128.i1(ppc_fp128 %a, metadata !"fpexcept.strict") #1
+  %conv = tail call i1 @llvm.experimental.constrained.fptosi.i1.ppcf128(ppc_fp128 %a, metadata !"fpexcept.strict") #1
   ret i1 %conv
 }
 
@@ -2062,7 +2062,7 @@ define i1 @ppcq_to_u1(ppc_fp128 %a) {
 ; PC64-NEXT:    mtlr 0
 ; PC64-NEXT:    blr
 entry:
-  %conv = tail call i1 @llvm.experimental.constrained.fptoui.ppcf128.i1(ppc_fp128 %a, metadata !"fpexcept.strict") #1
+  %conv = tail call i1 @llvm.experimental.constrained.fptoui.i1.ppcf128(ppc_fp128 %a, metadata !"fpexcept.strict") #1
   ret i1 %conv
 }
 
@@ -2121,10 +2121,10 @@ declare ppc_fp128 @llvm.experimental.constrained.exp.ppcf128(ppc_fp128, metadata
 declare ppc_fp128 @llvm.experimental.constrained.exp2.ppcf128(ppc_fp128, metadata, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.floor.ppcf128(ppc_fp128, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.fma.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128, metadata, metadata)
-declare ppc_fp128 @llvm.experimental.constrained.fpext.f32.ppcf128(float, metadata)
-declare ppc_fp128 @llvm.experimental.constrained.fpext.f64.ppcf128(double, metadata)
-declare float @llvm.experimental.constrained.fptrunc.ppcf128.f32(ppc_fp128, metadata, metadata)
-declare double @llvm.experimental.constrained.fptrunc.ppcf128.f64(ppc_fp128, metadata, metadata)
+declare ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f32(float, metadata)
+declare ppc_fp128 @llvm.experimental.constrained.fpext.ppcf128.f64(double, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.ppcf128(ppc_fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.ppcf128(ppc_fp128, metadata, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.log.ppcf128(ppc_fp128, metadata, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.log10.ppcf128(ppc_fp128, metadata, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.log2.ppcf128(ppc_fp128, metadata, metadata)
@@ -2144,10 +2144,10 @@ declare ppc_fp128 @llvm.experimental.constrained.tan.ppcf128(ppc_fp128, metadata
 declare ppc_fp128 @llvm.experimental.constrained.trunc.ppcf128(ppc_fp128, metadata)
 declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata)
 declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata)
-declare i1 @llvm.experimental.constrained.fptosi.ppcf128.i1(ppc_fp128, metadata)
+declare i1 @llvm.experimental.constrained.fptosi.i1.ppcf128(ppc_fp128, metadata)
 declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata)
 declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata)
-declare i1 @llvm.experimental.constrained.fptoui.ppcf128.i1(ppc_fp128, metadata)
+declare i1 @llvm.experimental.constrained.fptoui.i1.ppcf128(ppc_fp128, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.sitofp.ppcf128.i32(i32, metadata, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.uitofp.ppcf128.i32(i32, metadata, metadata)
 declare ppc_fp128 @llvm.experimental.constrained.sitofp.ppcf128.i64(i64, metadata, metadata)
diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
index fd5f26ba35742f3..7147257d27c4b8b 100644
--- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
+++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll
@@ -129,12 +129,11 @@ entry:
 define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: test_ssubo_i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    sub 5, 3, 4
-; CHECK-NEXT:    cmpwi 1, 4, 0
-; CHECK-NEXT:    cmpw 5, 3
-; CHECK-NEXT:    li 3, 1
-; CHECK-NEXT:    creqv 20, 5, 0
-; CHECK-NEXT:    isel 3, 0, 3, 20
+; CHECK-NEXT:    sub 3, 3, 4
+; CHECK-NEXT:    extsw 4, 3
+; CHECK-NEXT:    xor 3, 4, 3
+; CHECK-NEXT:    addic 4, 3, -1
+; CHECK-NEXT:    subfe 3, 4, 3
 ; CHECK-NEXT:    blr
 entry:
   %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index aa27d63bfa6262f..c03108c0617e75a 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -39,6 +39,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zicbom %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICBOM %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zicboz %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICBOZ %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zicbop %s -o - | FileCheck --check-prefixes=CHECK,RV32ZICBOP %s
+; RUN: llc -mtriple=riscv32 -mattr=+sha %s -o - | FileCheck --check-prefixes=CHECK,RV32SHA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+shcounterenw %s -o - | FileCheck --check-prefixes=CHECK,RV32SHCOUNTERENW %s
 ; RUN: llc -mtriple=riscv32 -mattr=+shgatpa %s -o - | FileCheck --check-prefixes=CHECK,RV32SHGATPA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+shvsatpa %s -o - | FileCheck --check-prefixes=CHECK,RV32SHVSATPA %s
@@ -61,6 +62,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+svbare %s -o - | FileCheck --check-prefixes=CHECK,RV32SVBARE %s
 ; RUN: llc -mtriple=riscv32 -mattr=+svnapot %s -o - | FileCheck --check-prefixes=CHECK,RV32SVNAPOT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV32SVPBMT %s
+; RUN: llc -mtriple=riscv32 -mattr=+svvptc %s -o - | FileCheck --check-prefixes=CHECK,RV32SVVPTC %s
 ; RUN: llc -mtriple=riscv32 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV32SVINVAL %s
 ; RUN: llc -mtriple=riscv32 -mattr=+xcvalu %s -o - | FileCheck --check-prefix=RV32XCVALU %s
 ; RUN: llc -mtriple=riscv32 -mattr=+xcvbitmanip %s -o - | FileCheck --check-prefix=RV32XCVBITMANIP %s
@@ -129,11 +131,11 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV32ZABHA %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvbc32e  %s -o - | FileCheck --check-prefix=RV32ZVBC32E %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zve32x -mattr=+experimental-zvkgs  %s -o - | FileCheck --check-prefix=RV32ZVKGS %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssnpm  %s -o - | FileCheck --check-prefix=RV32SSNPM %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-smnpm  %s -o - | FileCheck --check-prefix=RV32SMNPM %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-smmpm %s -o - | FileCheck --check-prefix=RV32SMMPM %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-sspm %s -o - | FileCheck --check-prefix=RV32SSPM %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-supm %s -o - | FileCheck --check-prefix=RV32SUPM %s
+; RUN: llc -mtriple=riscv32 -mattr=+ssnpm  %s -o - | FileCheck --check-prefix=RV32SSNPM %s
+; RUN: llc -mtriple=riscv32 -mattr=+smnpm  %s -o - | FileCheck --check-prefix=RV32SMNPM %s
+; RUN: llc -mtriple=riscv32 -mattr=+smmpm %s -o - | FileCheck --check-prefix=RV32SMMPM %s
+; RUN: llc -mtriple=riscv32 -mattr=+sspm %s -o - | FileCheck --check-prefix=RV32SSPM %s
+; RUN: llc -mtriple=riscv32 -mattr=+supm %s -o - | FileCheck --check-prefix=RV32SUPM %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-smctr  %s -o - | FileCheck --check-prefix=RV32SMCTR %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssctr  %s -o - | FileCheck --check-prefix=RV32SSCTR %s
 
@@ -178,6 +180,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zicbom %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICBOM %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zicboz %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICBOZ %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zicbop %s -o - | FileCheck --check-prefixes=CHECK,RV64ZICBOP %s
+; RUN: llc -mtriple=riscv64 -mattr=+sha %s -o - | FileCheck --check-prefixes=CHECK,RV64SHA %s
 ; RUN: llc -mtriple=riscv64 -mattr=+shcounterenw %s -o - | FileCheck --check-prefixes=CHECK,RV64SHCOUNTERENW %s
 ; RUN: llc -mtriple=riscv64 -mattr=+shgatpa %s -o - | FileCheck --check-prefixes=CHECK,RV64SHGATPA %s
 ; RUN: llc -mtriple=riscv64 -mattr=+shvsatpa %s -o - | FileCheck --check-prefixes=CHECK,RV64SHVSATPA %s
@@ -200,6 +203,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+svbare %s -o - | FileCheck --check-prefixes=CHECK,RV64SVBARE %s
 ; RUN: llc -mtriple=riscv64 -mattr=+svnapot %s -o - | FileCheck --check-prefixes=CHECK,RV64SVNAPOT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+svpbmt %s -o - | FileCheck --check-prefixes=CHECK,RV64SVPBMT %s
+; RUN: llc -mtriple=riscv64 -mattr=+svvptc %s -o - | FileCheck --check-prefixes=CHECK,RV64SVVPTC %s
 ; RUN: llc -mtriple=riscv64 -mattr=+svinval %s -o - | FileCheck --check-prefixes=CHECK,RV64SVINVAL %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xventanacondops %s -o - | FileCheck --check-prefixes=CHECK,RV64XVENTANACONDOPS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+xsfvfwmaccqqq %s -o - | FileCheck --check-prefix=RV64XSFVFWMACCQQQ %s
@@ -272,11 +276,11 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha %s -o - | FileCheck --check-prefix=RV64ZABHA %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvbc32e  %s -o - | FileCheck --check-prefix=RV64ZVBC32E %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zve32x -mattr=+experimental-zvkgs  %s -o - | FileCheck --check-prefix=RV64ZVKGS %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-ssnpm  %s -o - | FileCheck --check-prefix=RV64SSNPM %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-smnpm  %s -o - | FileCheck --check-prefix=RV64SMNPM %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-smmpm %s -o - | FileCheck --check-prefix=RV64SMMPM %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-sspm %s -o - | FileCheck --check-prefix=RV64SSPM %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-supm %s -o - | FileCheck --check-prefix=RV64SUPM %s
+; RUN: llc -mtriple=riscv64 -mattr=+ssnpm  %s -o - | FileCheck --check-prefix=RV64SSNPM %s
+; RUN: llc -mtriple=riscv64 -mattr=+smnpm  %s -o - | FileCheck --check-prefix=RV64SMNPM %s
+; RUN: llc -mtriple=riscv64 -mattr=+smmpm %s -o - | FileCheck --check-prefix=RV64SMMPM %s
+; RUN: llc -mtriple=riscv64 -mattr=+sspm %s -o - | FileCheck --check-prefix=RV64SSPM %s
+; RUN: llc -mtriple=riscv64 -mattr=+supm %s -o - | FileCheck --check-prefix=RV64SUPM %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-smctr  %s -o - | FileCheck --check-prefix=RV64SMCTR %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ssctr  %s -o - | FileCheck --check-prefix=RV64SSCTR %s
 
@@ -287,10 +291,10 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+rva20s64 %s -o - | FileCheck --check-prefix=RVA20S64 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+rva22u64 %s -o - | FileCheck --check-prefix=RVA22U64 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+rva22s64 %s -o - | FileCheck --check-prefix=RVA22S64 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-rva23u64 %s -o - | FileCheck --check-prefix=RVA23U64 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-rva23s64 %s -o - | FileCheck --check-prefix=RVA23S64 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-rvb23u64 %s -o - | FileCheck --check-prefix=RVB23U64 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-rvb23s64 %s -o - | FileCheck --check-prefix=RVB23S64 %s
+; RUN: llc -mtriple=riscv64 -mattr=+rva23u64 %s -o - | FileCheck --check-prefix=RVA23U64 %s
+; RUN: llc -mtriple=riscv64 -mattr=+rva23s64 %s -o - | FileCheck --check-prefix=RVA23S64 %s
+; RUN: llc -mtriple=riscv64 -mattr=+rvb23u64 %s -o - | FileCheck --check-prefix=RVB23U64 %s
+; RUN: llc -mtriple=riscv64 -mattr=+rvb23s64 %s -o - | FileCheck --check-prefix=RVB23S64 %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-rvm23u32 %s -o - | FileCheck --check-prefix=RVM23U32 %s
 
 ; CHECK: .attribute 4, 16
@@ -333,6 +337,7 @@
 ; RV32ZICBOM: .attribute 5, "rv32i2p1_zicbom1p0"
 ; RV32ZICBOZ: .attribute 5, "rv32i2p1_zicboz1p0"
 ; RV32ZICBOP: .attribute 5, "rv32i2p1_zicbop1p0"
+; RV32SHA: .attribute 5, "rv32i2p1_h1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssstateen1p0"
 ; RV32SHCOUNTERENW: .attribute 5, "rv32i2p1_shcounterenw1p0"
 ; RV32SHGATPA: .attribute 5, "rv32i2p1_shgatpa1p0"
 ; RV32SHVSATPA: .attribute 5, "rv32i2p1_shvsatpa1p0"
@@ -355,6 +360,7 @@
 ; RV32SVBARE: .attribute 5, "rv32i2p1_svbare1p0"
 ; RV32SVNAPOT: .attribute 5, "rv32i2p1_svnapot1p0"
 ; RV32SVPBMT: .attribute 5, "rv32i2p1_svpbmt1p0"
+; RV32SVVPTC: .attribute 5, "rv32i2p1_svvptc1p0"
 ; RV32SVINVAL: .attribute 5, "rv32i2p1_svinval1p0"
 ; RV32XCVALU: .attribute 5, "rv32i2p1_xcvalu1p0"
 ; RV32XCVBITMANIP: .attribute 5, "rv32i2p1_xcvbitmanip1p0"
@@ -474,6 +480,7 @@
 ; RV64ZAMA16B: .attribute 5, "rv64i2p1_zama16b1p0"
 ; RV64ZAWRS: .attribute 5, "rv64i2p1_zawrs1p0"
 ; RV64ZICBOP: .attribute 5, "rv64i2p1_zicbop1p0"
+; RV64SHA: .attribute 5, "rv64i2p1_h1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssstateen1p0"
 ; RV64SHCOUNTERENW: .attribute 5, "rv64i2p1_shcounterenw1p0"
 ; RV64SHGATPA: .attribute 5, "rv64i2p1_shgatpa1p0"
 ; RV64SHVSATPA: .attribute 5, "rv64i2p1_shvsatpa1p0"
@@ -496,6 +503,7 @@
 ; RV64SVBARE: .attribute 5, "rv64i2p1_svbare1p0"
 ; RV64SVNAPOT: .attribute 5, "rv64i2p1_svnapot1p0"
 ; RV64SVPBMT: .attribute 5, "rv64i2p1_svpbmt1p0"
+; RV64SVVPTC: .attribute 5, "rv64i2p1_svvptc1p0"
 ; RV64SVINVAL: .attribute 5, "rv64i2p1_svinval1p0"
 ; RV64XVENTANACONDOPS: .attribute 5, "rv64i2p1_xventanacondops1p0"
 ; RV64XSFVFWMACCQQQ: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvl32b1p0_xsfvfwmaccqqq1p0"
@@ -578,8 +586,8 @@
 ; RVA20S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zmmul1p0_za128rs1p0_ssccptr1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0"
 ; RVA22U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0"
 ; RVA22S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicsr2p0_zifencei2p0_zihintpause2p0_zihpm2p0_zmmul1p0_za64rs1p0_zfhmin1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscounterenw1p0_sstvala1p0_sstvecd1p0_svade1p0_svbare1p0_svinval1p0_svpbmt1p0"
-; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0"
+; RVA23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_supm1p0"
+; RVA23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_v1p0_h1p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zfhmin1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_zvbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvfhmin1p0_zvkb1p0_zvkt1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0_sha1p0_shcounterenw1p0_shgatpa1p0_shtvala1p0_shvsatpa1p0_shvstvala1p0_shvstvecd1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_ssnpm1p0_ssstateen1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_supm1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0"
 ; RVB23U64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0"
 ; RVB23S64: .attribute 5, "rv64i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zic64b1p0_zicbom1p0_zicbop1p0_zicboz1p0_ziccamoa1p0_ziccif1p0_zicclsm1p0_ziccrse1p0_zicntr2p0_zicond1p0_zicsr2p0_zifencei2p0_zihintntl1p0_zihintpause2p0_zihpm2p0_zimop1p0_zmmul1p0_za64rs1p0_zawrs1p0_zfa1p0_zca1p0_zcb1p0_zcmop1p0_zba1p0_zbb1p0_zbs1p0_zkt1p0_ssccptr1p0_sscofpmf1p0_sscounterenw1p0_sstc1p0_sstvala1p0_sstvecd1p0_ssu64xl1p0_svade1p0_svbare1p0_svinval1p0_svnapot1p0_svpbmt1p0"
 ; RVM23U32: .attribute 5, "rv32i2p1_m2p0_zicbop1p0_zicond1p0_zicsr2p0_zihintntl1p0_zihintpause2p0_zimop1p0_zmmul1p0_zca1p0_zcb1p0_zce1p0_zcmop1p0_zcmp1p0_zcmt1p0_zba1p0_zbb1p0_zbs1p0"
diff --git a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll
index 4cb6191e7322e9f..7e5ea173e52295b 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics-strict.ll
@@ -57,7 +57,7 @@ define double @sqrt_f64(double %a) nounwind strictfp {
   ret double %1
 }
 
-declare double @llvm.experimental.constrained.powi.f64.i32(double, i32, metadata, metadata)
+declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, metadata)
 
 define double @powi_f64(double %a, i32 %b) nounwind strictfp {
 ; RV32IFD-LABEL: powi_f64:
@@ -116,7 +116,7 @@ define double @powi_f64(double %a, i32 %b) nounwind strictfp {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-  %1 = call double @llvm.experimental.constrained.powi.f64.i32(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
+  %1 = call double @llvm.experimental.constrained.powi.f64(double %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret double %1
 }
 
diff --git a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll
index cbd84634de11c0b..7b2d38fefaacb1b 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics-strict.ll
@@ -52,7 +52,7 @@ define float @sqrt_f32(float %a) nounwind strictfp {
   ret float %1
 }
 
-declare float @llvm.experimental.constrained.powi.f32.i32(float, i32, metadata, metadata)
+declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata)
 
 define float @powi_f32(float %a, i32 %b) nounwind strictfp {
 ; RV32IF-LABEL: powi_f32:
@@ -111,7 +111,7 @@ define float @powi_f32(float %a, i32 %b) nounwind strictfp {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-  %1 = call float @llvm.experimental.constrained.powi.f32.i32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
+  %1 = call float @llvm.experimental.constrained.powi.f32(float %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   ret float %1
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 35e269b91190257..43be8feece23c1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -756,3 +756,104 @@ define void @lmul_8_x9() nounwind {
   %v9 = alloca <vscale x 8 x i64>
   ret void
 }
+
+define void @lmul_16_align() nounwind {
+; NOZBA-LABEL: lmul_16_align:
+; NOZBA:       # %bb.0:
+; NOZBA-NEXT:    addi sp, sp, -144
+; NOZBA-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; NOZBA-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; NOZBA-NEXT:    addi s0, sp, 144
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    li a1, 24
+; NOZBA-NEXT:    mul a0, a0, a1
+; NOZBA-NEXT:    sub sp, sp, a0
+; NOZBA-NEXT:    andi sp, sp, -128
+; NOZBA-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; NOZBA-NEXT:    vmv.v.i v8, 0
+; NOZBA-NEXT:    csrr a0, vlenb
+; NOZBA-NEXT:    add a0, sp, a0
+; NOZBA-NEXT:    addi a0, a0, 128
+; NOZBA-NEXT:    vs8r.v v8, (a0)
+; NOZBA-NEXT:    csrr a1, vlenb
+; NOZBA-NEXT:    slli a1, a1, 3
+; NOZBA-NEXT:    add a0, a0, a1
+; NOZBA-NEXT:    vs8r.v v8, (a0)
+; NOZBA-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; NOZBA-NEXT:    vmv.v.i v8, 0
+; NOZBA-NEXT:    addi a0, sp, 128
+; NOZBA-NEXT:    vs1r.v v8, (a0)
+; NOZBA-NEXT:    addi sp, s0, -144
+; NOZBA-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; NOZBA-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; NOZBA-NEXT:    addi sp, sp, 144
+; NOZBA-NEXT:    ret
+;
+; ZBA-LABEL: lmul_16_align:
+; ZBA:       # %bb.0:
+; ZBA-NEXT:    addi sp, sp, -144
+; ZBA-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; ZBA-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; ZBA-NEXT:    addi s0, sp, 144
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    slli a0, a0, 3
+; ZBA-NEXT:    sh1add a0, a0, a0
+; ZBA-NEXT:    sub sp, sp, a0
+; ZBA-NEXT:    andi sp, sp, -128
+; ZBA-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; ZBA-NEXT:    vmv.v.i v8, 0
+; ZBA-NEXT:    csrr a0, vlenb
+; ZBA-NEXT:    add a0, sp, a0
+; ZBA-NEXT:    addi a0, a0, 128
+; ZBA-NEXT:    vs8r.v v8, (a0)
+; ZBA-NEXT:    csrr a1, vlenb
+; ZBA-NEXT:    sh3add a0, a1, a0
+; ZBA-NEXT:    vs8r.v v8, (a0)
+; ZBA-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; ZBA-NEXT:    vmv.v.i v8, 0
+; ZBA-NEXT:    addi a0, sp, 128
+; ZBA-NEXT:    vs1r.v v8, (a0)
+; ZBA-NEXT:    addi sp, s0, -144
+; ZBA-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; ZBA-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; ZBA-NEXT:    addi sp, sp, 144
+; ZBA-NEXT:    ret
+;
+; NOMUL-LABEL: lmul_16_align:
+; NOMUL:       # %bb.0:
+; NOMUL-NEXT:    addi sp, sp, -144
+; NOMUL-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; NOMUL-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; NOMUL-NEXT:    addi s0, sp, 144
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    slli a0, a0, 3
+; NOMUL-NEXT:    mv a1, a0
+; NOMUL-NEXT:    slli a0, a0, 1
+; NOMUL-NEXT:    add a0, a0, a1
+; NOMUL-NEXT:    sub sp, sp, a0
+; NOMUL-NEXT:    andi sp, sp, -128
+; NOMUL-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; NOMUL-NEXT:    vmv.v.i v8, 0
+; NOMUL-NEXT:    csrr a0, vlenb
+; NOMUL-NEXT:    add a0, sp, a0
+; NOMUL-NEXT:    addi a0, a0, 128
+; NOMUL-NEXT:    vs8r.v v8, (a0)
+; NOMUL-NEXT:    csrr a1, vlenb
+; NOMUL-NEXT:    slli a1, a1, 3
+; NOMUL-NEXT:    add a0, a0, a1
+; NOMUL-NEXT:    vs8r.v v8, (a0)
+; NOMUL-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; NOMUL-NEXT:    vmv.v.i v8, 0
+; NOMUL-NEXT:    addi a0, sp, 128
+; NOMUL-NEXT:    vs1r.v v8, (a0)
+; NOMUL-NEXT:    addi sp, s0, -144
+; NOMUL-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; NOMUL-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; NOMUL-NEXT:    addi sp, sp, 144
+; NOMUL-NEXT:    ret
+  %v1 = alloca <vscale x 16 x i64>
+  %v2 = alloca <vscale x 1 x i64>
+  store <vscale x 16 x i64> zeroinitializer, ptr %v1
+  store <vscale x 1 x i64> zeroinitializer, ptr %v2
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
index 8f1ff7ed4a11e23..3069d6011075703 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll
@@ -1,6 +1,92 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
+
+define <1 x bfloat> @vector_compress_v1bf16(<1 x bfloat> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> undef)
+  ret <1 x bfloat> %ret
+}
+
+define <1 x bfloat> @vector_compress_v1bf16_passthru(<1 x bfloat> %passthru, <1 x bfloat> %v, <1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v1bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <1 x bfloat> @llvm.experimental.vector.compress.v1bf16(<1 x bfloat> %v, <1 x i1> %mask, <1 x bfloat> %passthru)
+  ret <1 x bfloat> %ret
+}
+
+define <2 x bfloat> @vector_compress_v2bf16(<2 x bfloat> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> undef)
+  ret <2 x bfloat> %ret
+}
+
+define <2 x bfloat> @vector_compress_v2bf16_passthru(<2 x bfloat> %passthru, <2 x bfloat> %v, <2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v2bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <2 x bfloat> @llvm.experimental.vector.compress.v2bf16(<2 x bfloat> %v, <2 x i1> %mask, <2 x bfloat> %passthru)
+  ret <2 x bfloat> %ret
+}
+
+define <4 x bfloat> @vector_compress_v4bf16(<4 x bfloat> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> undef)
+  ret <4 x bfloat> %ret
+}
+
+define <4 x bfloat> @vector_compress_v4bf16_passthru(<4 x bfloat> %passthru, <4 x bfloat> %v, <4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v4bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <4 x bfloat> @llvm.experimental.vector.compress.v4bf16(<4 x bfloat> %v, <4 x i1> %mask, <4 x bfloat> %passthru)
+  ret <4 x bfloat> %ret
+}
+
+define <8 x bfloat> @vector_compress_v8bf16(<8 x bfloat> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> undef)
+  ret <8 x bfloat> %ret
+}
+
+define <8 x bfloat> @vector_compress_v8bf16_passthru(<8 x bfloat> %passthru, <8 x bfloat> %v, <8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_v8bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <8 x bfloat> @llvm.experimental.vector.compress.v8bf16(<8 x bfloat> %v, <8 x i1> %mask, <8 x bfloat> %passthru)
+  ret <8 x bfloat> %ret
+}
 
 define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) {
 ; CHECK-LABEL: vector_compress_v1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 7bf47d42de3b951..ea4072f15712042 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -428,6 +428,33 @@ define void @buildvec_dominant0_v8i16(ptr %x) {
   ret void
 }
 
+define void @buildvec_dominant0_v8i16_with_end_element(ptr %x) {
+; CHECK-LABEL: buildvec_dominant0_v8i16_with_end_element:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 8
+; CHECK-NEXT:    li a1, 3
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 3>, ptr %x
+  ret void
+}
+
+define void @buildvec_dominant0_v8i16_with_tail(ptr %x) {
+; CHECK-LABEL: buildvec_dominant0_v8i16_with_tail:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI35_0)
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a1)
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <8 x i16> <i16 8, i16 8, i16 8, i16 8, i16 8, i16 undef, i16 2, i16 3>, ptr %x
+  ret void
+}
+
+
 define void @buildvec_dominant1_v8i16(ptr %x) {
 ; CHECK-LABEL: buildvec_dominant1_v8i16:
 ; CHECK:       # %bb.0:
@@ -494,8 +521,8 @@ define <2 x i8> @buildvec_dominant2_v2i8() {
 define void @buildvec_dominant0_v2i32(ptr %x) {
 ; RV32-LABEL: buildvec_dominant0_v2i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, %hi(.LCPI38_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI38_0)
+; RV32-NEXT:    lui a1, %hi(.LCPI40_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI40_0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vle32.v v8, (a1)
 ; RV32-NEXT:    vse32.v v8, (a0)
@@ -503,8 +530,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
 ;
 ; RV64V-LABEL: buildvec_dominant0_v2i32:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    lui a1, %hi(.LCPI38_0)
-; RV64V-NEXT:    ld a1, %lo(.LCPI38_0)(a1)
+; RV64V-NEXT:    lui a1, %hi(.LCPI40_0)
+; RV64V-NEXT:    ld a1, %lo(.LCPI40_0)(a1)
 ; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64V-NEXT:    vmv.v.i v8, -1
 ; RV64V-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
@@ -514,8 +541,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
 ;
 ; RV64ZVE32-LABEL: buildvec_dominant0_v2i32:
 ; RV64ZVE32:       # %bb.0:
-; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI38_0)
-; RV64ZVE32-NEXT:    ld a1, %lo(.LCPI38_0)(a1)
+; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI40_0)
+; RV64ZVE32-NEXT:    ld a1, %lo(.LCPI40_0)(a1)
 ; RV64ZVE32-NEXT:    li a2, -1
 ; RV64ZVE32-NEXT:    sd a1, 0(a0)
 ; RV64ZVE32-NEXT:    sd a2, 8(a0)
@@ -527,8 +554,8 @@ define void @buildvec_dominant0_v2i32(ptr %x) {
 define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
 ; RV32-LABEL: buildvec_dominant1_optsize_v2i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, %hi(.LCPI39_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI39_0)
+; RV32-NEXT:    lui a1, %hi(.LCPI41_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI41_0)
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vle32.v v8, (a1)
 ; RV32-NEXT:    vse32.v v8, (a0)
@@ -536,8 +563,8 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
 ;
 ; RV64V-LABEL: buildvec_dominant1_optsize_v2i32:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    lui a1, %hi(.LCPI39_0)
-; RV64V-NEXT:    addi a1, a1, %lo(.LCPI39_0)
+; RV64V-NEXT:    lui a1, %hi(.LCPI41_0)
+; RV64V-NEXT:    addi a1, a1, %lo(.LCPI41_0)
 ; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64V-NEXT:    vle64.v v8, (a1)
 ; RV64V-NEXT:    vse64.v v8, (a0)
@@ -545,8 +572,8 @@ define void @buildvec_dominant1_optsize_v2i32(ptr %x) optsize {
 ;
 ; RV64ZVE32-LABEL: buildvec_dominant1_optsize_v2i32:
 ; RV64ZVE32:       # %bb.0:
-; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI39_0)
-; RV64ZVE32-NEXT:    ld a1, %lo(.LCPI39_0)(a1)
+; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI41_0)
+; RV64ZVE32-NEXT:    ld a1, %lo(.LCPI41_0)(a1)
 ; RV64ZVE32-NEXT:    li a2, -1
 ; RV64ZVE32-NEXT:    sd a1, 0(a0)
 ; RV64ZVE32-NEXT:    sd a2, 8(a0)
@@ -604,8 +631,8 @@ define void @buildvec_seq_v8i8_v2i32(ptr %x) {
 define void @buildvec_seq_v16i8_v2i64(ptr %x) {
 ; RV32-LABEL: buildvec_seq_v16i8_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a1, %hi(.LCPI42_0)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI42_0)
+; RV32-NEXT:    lui a1, %hi(.LCPI44_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI44_0)
 ; RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV32-NEXT:    vle8.v v8, (a1)
 ; RV32-NEXT:    vse8.v v8, (a0)
@@ -613,8 +640,8 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) {
 ;
 ; RV64V-LABEL: buildvec_seq_v16i8_v2i64:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    lui a1, %hi(.LCPI42_0)
-; RV64V-NEXT:    ld a1, %lo(.LCPI42_0)(a1)
+; RV64V-NEXT:    lui a1, %hi(.LCPI44_0)
+; RV64V-NEXT:    ld a1, %lo(.LCPI44_0)(a1)
 ; RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64V-NEXT:    vmv.v.x v8, a1
 ; RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
@@ -623,8 +650,8 @@ define void @buildvec_seq_v16i8_v2i64(ptr %x) {
 ;
 ; RV64ZVE32-LABEL: buildvec_seq_v16i8_v2i64:
 ; RV64ZVE32:       # %bb.0:
-; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI42_0)
-; RV64ZVE32-NEXT:    addi a1, a1, %lo(.LCPI42_0)
+; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI44_0)
+; RV64ZVE32-NEXT:    addi a1, a1, %lo(.LCPI44_0)
 ; RV64ZVE32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV64ZVE32-NEXT:    vle8.v v8, (a1)
 ; RV64ZVE32-NEXT:    vse8.v v8, (a0)
@@ -656,8 +683,8 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
 ;
 ; RV64ZVE32-LABEL: buildvec_seq2_v16i8_v2i64:
 ; RV64ZVE32:       # %bb.0:
-; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI43_0)
-; RV64ZVE32-NEXT:    addi a1, a1, %lo(.LCPI43_0)
+; RV64ZVE32-NEXT:    lui a1, %hi(.LCPI45_0)
+; RV64ZVE32-NEXT:    addi a1, a1, %lo(.LCPI45_0)
 ; RV64ZVE32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
 ; RV64ZVE32-NEXT:    vle8.v v8, (a1)
 ; RV64ZVE32-NEXT:    vse8.v v8, (a0)
@@ -3384,3 +3411,33 @@ define <1 x i32> @buildvec_v1i32_pack(i32 %e1) {
   ret <1 x i32> %v1
 }
 
+define <4 x i32> @buildvec_vslide1up(i32 %e1, i32 %e2) {
+; CHECK-LABEL: buildvec_vslide1up:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a1
+; CHECK-NEXT:    ret
+  %v1 = insertelement <4 x i32> poison, i32 %e2, i32 0
+  %v2 = insertelement <4 x i32> %v1, i32 %e1, i32 1
+  %v3 = insertelement <4 x i32> %v2, i32 %e1, i32 2
+  %v4 = insertelement <4 x i32> %v3, i32 %e1, i32 3
+  ret <4 x i32> %v4
+}
+
+define <4 x i1> @buildvec_i1_splat(i1 %e1) {
+; CHECK-LABEL: buildvec_i1_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+  %v1 = insertelement <4 x i1> poison, i1 %e1, i32 0
+  %v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1
+  %v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2
+  %v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3
+  ret <4 x i1> %v4
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index 47cbb2509441ad6..5b9af1a3cfe233a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -566,3 +566,26 @@ define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
 ; ZVE32F-NEXT:    ret
   ret <128 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>
 }
+
+define <4 x i1> @buildvec_mask_splat(i1 %e1) {
+; CHECK-LABEL: buildvec_mask_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi a0, a0, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmsne.vi v0, v8, 0
+; CHECK-NEXT:    ret
+;
+; ZVE32F-LABEL: buildvec_mask_splat:
+; ZVE32F:       # %bb.0:
+; ZVE32F-NEXT:    andi a0, a0, 1
+; ZVE32F-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; ZVE32F-NEXT:    vmv.v.x v8, a0
+; ZVE32F-NEXT:    vmsne.vi v0, v8, 0
+; ZVE32F-NEXT:    ret
+  %v1 = insertelement <4 x i1> poison, i1 %e1, i32 0
+  %v2 = insertelement <4 x i1> %v1, i1 %e1, i32 1
+  %v3 = insertelement <4 x i1> %v2, i1 %e1, i32 2
+  %v4 = insertelement <4 x i1> %v3, i1 %e1, i32 3
+  ret <4 x i1> %v4
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
index 351c0bab9dca893..adfae5ede7bb59a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fold-binary-reduce.ll
@@ -366,3 +366,78 @@ entry:
   ret void
 }
 declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
+
+define i64 @op_then_reduce(<4 x i64> %v, <4 x i64> %v2) {
+; CHECK-LABEL: op_then_reduce:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    vmv.s.x v10, zero
+; CHECK-NEXT:    vredsum.vs v8, v8, v10
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
+  %rdx2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v2)
+  %res = add i64 %rdx1, %rdx2
+  ret i64 %res
+}
+
+
+define i64 @two_reduce_scalar_bypass(<4 x i64> %v, <4 x i64> %v2) {
+; CHECK-LABEL: two_reduce_scalar_bypass:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.s.x v12, zero
+; CHECK-NEXT:    vredxor.vs v8, v8, v12
+; CHECK-NEXT:    vredsum.vs v8, v10, v8
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %rdx1 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %v)
+  %rdx2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v2)
+  %res = add i64 %rdx1, %rdx2
+  ret i64 %res
+}
+
+define i64 @two_reduce_scalar_bypass_zext(<4 x i64> %v, <4 x i32> %v2) {
+; CHECK-LABEL: two_reduce_scalar_bypass_zext:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v11, zero
+; CHECK-NEXT:    vredsum.vs v10, v10, v11
+; CHECK-NEXT:    vmv.x.s a0, v10
+; CHECK-NEXT:    slli a0, a0, 32
+; CHECK-NEXT:    srli a0, a0, 32
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vredsum.vs v8, v8, v10
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
+  %rdx2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v2)
+  %rdx2.zext = zext i32 %rdx2 to i64
+  %res = add i64 %rdx1, %rdx2.zext
+  ret i64 %res
+}
+
+define i64 @two_reduce_scalar_bypass_sext(<4 x i64> %v, <4 x i32> %v2) {
+; CHECK-LABEL: two_reduce_scalar_bypass_sext:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v11, zero
+; CHECK-NEXT:    vredsum.vs v10, v10, v11
+; CHECK-NEXT:    vmv.x.s a0, v10
+; CHECK-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vredsum.vs v8, v8, v10
+; CHECK-NEXT:    vmv.x.s a0, v8
+; CHECK-NEXT:    ret
+entry:
+  %rdx1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %v)
+  %rdx2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v2)
+  %rdx2.zext = sext i32 %rdx2 to i64
+  %res = add i64 %rdx1, %rdx2.zext
+  ret i64 %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr107950.ll b/llvm/test/CodeGen/RISCV/rvv/pr107950.ll
index 8384008c245fc25..9d93ed3172132fa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr107950.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr107950.ll
@@ -26,5 +26,5 @@ entry:
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
 declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>) #1
 
-attributes #0 = { "target-features"="+64bit,+d,+f,+relax,+v,+xsifivecdiscarddlone,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-a,-b,-c,-e,-experimental-smctr,-experimental-smmpm,-experimental-smnpm,-experimental-ssctr,-experimental-ssnpm,-experimental-sspm,-experimental-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-m,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
+attributes #0 = { "target-features"="+64bit,+d,+f,+relax,+v,+xsifivecdiscarddlone,+zicsr,+zve32f,+zve32x,+zve64d,+zve64f,+zve64x,+zvl128b,+zvl32b,+zvl64b,-a,-b,-c,-e,-experimental-smctr,-smmpm,-smnpm,-experimental-ssctr,-ssnpm,-sspm,-supm,-experimental-zacas,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-m,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smepmp,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zaamo,-zabha,-zalrsc,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zifencei,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-ztso,-zvbb,-zvbc,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl4096b,-zvl512b,-zvl65536b,-zvl8192b" }
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(read) }
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 4f58ccb5188d31f..64c59769546fb74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -1,53 +1,23 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,POSTRA
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-vsetvl-after-rvv-regalloc=false -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,PRERA
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
 
 define void @vid(ptr %p) {
-; POSTRA-LABEL: vid:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT:    vid.v v8
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vid.v v8
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vid:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a1, vlenb
-; PRERA-NEXT:    slli a1, a1, 3
-; PRERA-NEXT:    sub sp, sp, a1
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT:    vid.v v8
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a1, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vid:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    ret
   %vid = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(<vscale x 8 x i64> poison, i64 -1)
   store volatile <vscale x 8 x i64> %vid, ptr %p
 
@@ -111,51 +81,22 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
 }
 
 define void @vmv.v.i(ptr %p) {
-; POSTRA-LABEL: vmv.v.i:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT:    vmv.v.i v8, 1
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vmv.v.i v8, 1
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vmv.v.i:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a1, vlenb
-; PRERA-NEXT:    slli a1, a1, 3
-; PRERA-NEXT:    sub sp, sp, a1
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT:    vmv.v.i v8, 1
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a1, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vmv.v.i:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 1
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vmv.v.i v8, 1
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    ret
   %vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
   store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
 
@@ -172,66 +113,35 @@ define void @vmv.v.i(ptr %p) {
   ret void
 }
 
-; The live range of %x needs extended down to the use of vmv.v.x at the end of
-; the block.
 define void @vmv.v.x_needs_extended(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.v.x_needs_extended:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    addi sp, sp, -16
-; POSTRA-NEXT:    .cfi_def_cfa_offset 16
-; POSTRA-NEXT:    csrr a2, vlenb
-; POSTRA-NEXT:    slli a2, a2, 3
-; POSTRA-NEXT:    sub sp, sp, a2
-; POSTRA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; POSTRA-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; POSTRA-NEXT:    vmv.v.x v8, a1
-; POSTRA-NEXT:    addi a1, sp, 16
-; POSTRA-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    csrr a0, vlenb
-; POSTRA-NEXT:    slli a0, a0, 3
-; POSTRA-NEXT:    add sp, sp, a0
-; POSTRA-NEXT:    addi sp, sp, 16
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vmv.v.x_needs_extended:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a2, vlenb
-; PRERA-NEXT:    slli a2, a2, 3
-; PRERA-NEXT:    sub sp, sp, a2
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; PRERA-NEXT:    vmv.v.x v8, a1
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a1, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vmv.v.x_needs_extended:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    sub sp, sp, a2
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    addi a1, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    csrr a0, vlenb
+; CHECK-NEXT:    slli a0, a0, 3
+; CHECK-NEXT:    add sp, sp, a0
+; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    ret
   %vmv.v.x = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
   store volatile <vscale x 8 x i64> %vmv.v.x, ptr %p
 
@@ -249,53 +159,23 @@ define void @vmv.v.x_needs_extended(ptr %p, i64 %x) {
 }
 
 define void @vmv.v.x_live(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.v.x_live:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; POSTRA-NEXT:    vmv.v.x v8, a1
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vmv.v.x v8, a1
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    sd a1, 0(a0)
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vmv.v.x_live:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a2, vlenb
-; PRERA-NEXT:    slli a2, a2, 3
-; PRERA-NEXT:    sub sp, sp, a2
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
-; PRERA-NEXT:    vmv.v.x v8, a1
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a2, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    sd a1, 0(a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vmv.v.x_live:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
   %vmv.v.x = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
   store volatile <vscale x 8 x i64> %vmv.v.x, ptr %p
 
@@ -314,53 +194,23 @@ define void @vmv.v.x_live(ptr %p, i64 %x) {
 }
 
 define void @vfmv.v.f(ptr %p, double %x) {
-; POSTRA-LABEL: vfmv.v.f:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; POSTRA-NEXT:    vfmv.v.f v8, fa0
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vfmv.v.f v8, fa0
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    fsd fa0, 0(a0)
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vfmv.v.f:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a1, vlenb
-; PRERA-NEXT:    slli a1, a1, 3
-; PRERA-NEXT:    sub sp, sp, a1
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; PRERA-NEXT:    vfmv.v.f v8, fa0
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a1, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    fsd fa0, 0(a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vfmv.v.f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vfmv.v.f v8, fa0
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    fsd fa0, 0(a0)
+; CHECK-NEXT:    ret
   %vfmv.v.f = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
   store volatile <vscale x 8 x double> %vfmv.v.f, ptr %p
 
@@ -379,53 +229,23 @@ define void @vfmv.v.f(ptr %p, double %x) {
 }
 
 define void @vmv.s.x(ptr %p, i64 %x) {
-; POSTRA-LABEL: vmv.s.x:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; POSTRA-NEXT:    vmv.s.x v8, a1
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vmv.s.x v8, a1
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    sd a1, 0(a0)
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vmv.s.x:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a2, vlenb
-; PRERA-NEXT:    slli a2, a2, 3
-; PRERA-NEXT:    sub sp, sp, a2
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
-; PRERA-NEXT:    vmv.s.x v8, a1
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a2, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    sd a1, 0(a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vmv.s.x:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vmv.s.x v8, a1
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vmv.s.x v8, a1
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
   %vmv.s.x = call <vscale x 8 x i64> @llvm.riscv.vmv.s.x.nxv8i64(<vscale x 8 x i64> poison, i64 %x, i64 -1)
   store volatile <vscale x 8 x i64> %vmv.s.x, ptr %p
 
@@ -444,53 +264,23 @@ define void @vmv.s.x(ptr %p, i64 %x) {
 }
 
 define void @vfmv.s.f(ptr %p, double %x) {
-; POSTRA-LABEL: vfmv.s.f:
-; POSTRA:       # %bb.0:
-; POSTRA-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; POSTRA-NEXT:    vfmv.s.f v8, fa0
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vl8re64.v v16, (a0)
-; POSTRA-NEXT:    vl8re64.v v24, (a0)
-; POSTRA-NEXT:    vl8re64.v v0, (a0)
-; POSTRA-NEXT:    vl8re64.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    vs8r.v v0, (a0)
-; POSTRA-NEXT:    vs8r.v v24, (a0)
-; POSTRA-NEXT:    vs8r.v v16, (a0)
-; POSTRA-NEXT:    vfmv.s.f v8, fa0
-; POSTRA-NEXT:    vs8r.v v8, (a0)
-; POSTRA-NEXT:    fsd fa0, 0(a0)
-; POSTRA-NEXT:    ret
-;
-; PRERA-LABEL: vfmv.s.f:
-; PRERA:       # %bb.0:
-; PRERA-NEXT:    addi sp, sp, -16
-; PRERA-NEXT:    .cfi_def_cfa_offset 16
-; PRERA-NEXT:    csrr a1, vlenb
-; PRERA-NEXT:    slli a1, a1, 3
-; PRERA-NEXT:    sub sp, sp, a1
-; PRERA-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; PRERA-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; PRERA-NEXT:    vfmv.s.f v8, fa0
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    addi a1, sp, 16
-; PRERA-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; PRERA-NEXT:    vl8re64.v v24, (a0)
-; PRERA-NEXT:    vl8re64.v v0, (a0)
-; PRERA-NEXT:    vl8re64.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v0, (a0)
-; PRERA-NEXT:    vs8r.v v24, (a0)
-; PRERA-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; PRERA-NEXT:    vs8r.v v16, (a0)
-; PRERA-NEXT:    vs8r.v v8, (a0)
-; PRERA-NEXT:    fsd fa0, 0(a0)
-; PRERA-NEXT:    csrr a0, vlenb
-; PRERA-NEXT:    slli a0, a0, 3
-; PRERA-NEXT:    add sp, sp, a0
-; PRERA-NEXT:    addi sp, sp, 16
-; PRERA-NEXT:    ret
+; CHECK-LABEL: vfmv.s.f:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vl8re64.v v16, (a0)
+; CHECK-NEXT:    vl8re64.v v24, (a0)
+; CHECK-NEXT:    vl8re64.v v0, (a0)
+; CHECK-NEXT:    vl8re64.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    vs8r.v v0, (a0)
+; CHECK-NEXT:    vs8r.v v24, (a0)
+; CHECK-NEXT:    vs8r.v v16, (a0)
+; CHECK-NEXT:    vfmv.s.f v8, fa0
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    fsd fa0, 0(a0)
+; CHECK-NEXT:    ret
   %vfmv.s.f = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> poison, double %x, i64 -1)
   store volatile <vscale x 8 x double> %vfmv.s.f, ptr %p
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 6700920cebff0a8..23ebfade6f6b0f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -357,11 +357,11 @@ define <vscale x 2 x float> @vpmerge_constrained_fadd(<vscale x 2 x float> %pass
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 ; CHECK-NEXT:    ret
-  %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
+  %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 %vl) strictfp
   ret <vscale x 2 x float> %b
 }
-declare <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata)
+declare <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, metadata, metadata)
 declare <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i64)
 
 ; This shouldn't be folded because we need to preserve exceptions with
@@ -374,7 +374,7 @@ define <vscale x 2 x float> @vpmerge_constrained_fadd_vlmax(<vscale x 2 x float>
 ; CHECK-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
 ; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 ; CHECK-NEXT:    ret
-  %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
+  %a = call <vscale x 2 x float> @llvm.experimental.constrained.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp
   %b = call <vscale x 2 x float> @llvm.riscv.vmerge.nxv2f32.nxv2f32(<vscale x 2 x float> %passthru, <vscale x 2 x float> %passthru, <vscale x 2 x float> %a, <vscale x 2 x i1> %m, i64 -1) strictfp
   ret <vscale x 2 x float> %b
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll b/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll
index 85d72ad2fe9cb4d..7516a72a92bc8bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfhmin,+zvfbfmin < %s | FileCheck %s
 
 ; Vector compress for i8 type
 
@@ -472,6 +474,134 @@ define <vscale x 8 x i64> @vector_compress_nxv8i64_passthru(<vscale x 8 x i64> %
   ret <vscale x 8 x i64> %ret
 }
 
+; Vector compress for bf16 type
+
+define <vscale x 1 x bfloat> @vector_compress_nxv1bf16(<vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv1bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 1 x bfloat> @llvm.experimental.vector.compress.nxv1bf16(<vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask, <vscale x 1 x bfloat> undef)
+  ret <vscale x 1 x bfloat> %ret
+}
+
+define <vscale x 1 x bfloat> @vector_compress_nxv1bf16_passthru(<vscale x 1 x bfloat> %passthru, <vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv1bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 1 x bfloat> @llvm.experimental.vector.compress.nxv1bf16(<vscale x 1 x bfloat> %data, <vscale x 1 x i1> %mask, <vscale x 1 x bfloat> %passthru)
+  ret <vscale x 1 x bfloat> %ret
+}
+
+define <vscale x 2 x bfloat> @vector_compress_nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 2 x bfloat> @llvm.experimental.vector.compress.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
+  ret <vscale x 2 x bfloat> %ret
+}
+
+define <vscale x 2 x bfloat> @vector_compress_nxv2bf16_passthru(<vscale x 2 x bfloat> %passthru, <vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv2bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 2 x bfloat> @llvm.experimental.vector.compress.nxv2bf16(<vscale x 2 x bfloat> %data, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> %passthru)
+  ret <vscale x 2 x bfloat> %ret
+}
+
+define <vscale x 4 x bfloat> @vector_compress_nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vcompress.vm v9, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 4 x bfloat> @llvm.experimental.vector.compress.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask, <vscale x 4 x bfloat> undef)
+  ret <vscale x 4 x bfloat> %ret
+}
+
+define <vscale x 4 x bfloat> @vector_compress_nxv4bf16_passthru(<vscale x 4 x bfloat> %passthru, <vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv4bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v9, v0
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 4 x bfloat> @llvm.experimental.vector.compress.nxv4bf16(<vscale x 4 x bfloat> %data, <vscale x 4 x i1> %mask, <vscale x 4 x bfloat> %passthru)
+  ret <vscale x 4 x bfloat> %ret
+}
+
+define <vscale x 8 x bfloat> @vector_compress_nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT:    vcompress.vm v10, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 8 x bfloat> @llvm.experimental.vector.compress.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> undef)
+  ret <vscale x 8 x bfloat> %ret
+}
+
+define <vscale x 8 x bfloat> @vector_compress_nxv8bf16_passthru(<vscale x 8 x bfloat> %passthru, <vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv8bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m2, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v10, v0
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 8 x bfloat> @llvm.experimental.vector.compress.nxv8bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %mask, <vscale x 8 x bfloat> %passthru)
+  ret <vscale x 8 x bfloat> %ret
+}
+
+define <vscale x 16 x bfloat> @vector_compress_nxv16bf16(<vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT:    vcompress.vm v12, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v12
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 16 x bfloat> @llvm.experimental.vector.compress.nxv16bf16(<vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask, <vscale x 16 x bfloat> undef)
+  ret <vscale x 16 x bfloat> %ret
+}
+
+define <vscale x 16 x bfloat> @vector_compress_nxv16bf16_passthru(<vscale x 16 x bfloat> %passthru, <vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv16bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m4, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v12, v0
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 16 x bfloat> @llvm.experimental.vector.compress.nxv16bf16(<vscale x 16 x bfloat> %data, <vscale x 16 x i1> %mask, <vscale x 16 x bfloat> %passthru)
+  ret <vscale x 16 x bfloat> %ret
+}
+
+define <vscale x 32 x bfloat> @vector_compress_nxv32bf16(<vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv32bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vcompress.vm v16, v8, v0
+; CHECK-NEXT:    vmv.v.v v8, v16
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 32 x bfloat> @llvm.experimental.vector.compress.nxv32bf16(<vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask, <vscale x 32 x bfloat> undef)
+  ret <vscale x 32 x bfloat> %ret
+}
+
+define <vscale x 32 x bfloat> @vector_compress_nxv32bf16_passthru(<vscale x 32 x bfloat> %passthru, <vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask) {
+; CHECK-LABEL: vector_compress_nxv32bf16_passthru:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16, m8, tu, ma
+; CHECK-NEXT:    vcompress.vm v8, v16, v0
+; CHECK-NEXT:    ret
+  %ret = call <vscale x 32 x bfloat> @llvm.experimental.vector.compress.nxv32bf16(<vscale x 32 x bfloat> %data, <vscale x 32 x i1> %mask, <vscale x 32 x bfloat> %passthru)
+  ret <vscale x 32 x bfloat> %ret
+}
+
 ; Vector compress for f16 type
 
 define <vscale x 1 x half> @vector_compress_nxv1f16(<vscale x 1 x half> %data, <vscale x 1 x i1> %mask) {
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll
new file mode 100644
index 000000000000000..6955411a0e4e99b
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll
@@ -0,0 +1,14 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpMemoryModel Logical GLSL450
+
+define void @test_group_memory_barrier_with_group_sync() {
+entry:
+  ; CHECK: %[[#TY:]] = OpTypeInt 32 0
+  ; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16
+  ; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2
+  ; CHECK: OpControlBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]]
+  call void @llvm.spv.group.memory.barrier.with.group.sync()
+  ret void
+}
diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll
new file mode 100644
index 000000000000000..d18b16b843c37bc
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll
@@ -0,0 +1,40 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %}
+
+; Make sure lowering is correctly generating spirv code.
+
+; CHECK-DAG: %[[#double:]] = OpTypeFloat 64
+; CHECK-DAG: %[[#vec_2_double:]] = OpTypeVector %[[#double]] 2
+; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#vec_2_int_32:]] = OpTypeVector %[[#int_32]] 2
+; CHECK-DAG: %[[#vec_4_int_32:]] = OpTypeVector %[[#int_32]] 4
+
+
+define spir_func noundef i32 @test_scalar(double noundef %D) local_unnamed_addr {
+entry:
+  ; CHECK-LABEL: ; -- Begin function test_scalar
+  ; CHECK: %[[#param:]] = OpFunctionParameter %[[#double]]
+  ; CHECK: %[[#bitcast:]] = OpBitcast %[[#vec_2_int_32]] %[[#param]]
+  %0 = bitcast double %D to <2 x i32>
+  ; CHECK: %[[#]] = OpCompositeExtract %[[#int_32]] %[[#bitcast]] 0
+  %1 = extractelement <2 x i32> %0, i64 0
+  ; CHECK: %[[#]] = OpCompositeExtract %[[#int_32]] %[[#bitcast]] 1
+  %2 = extractelement <2 x i32> %0, i64 1
+  %add = add i32 %1, %2
+  ret i32 %add
+}
+
+
+define spir_func noundef <2 x i32> @test_vector(<2 x double> noundef %D) local_unnamed_addr {
+entry:
+  ; CHECK-LABEL: ; -- Begin function test_vector
+  ; CHECK: %[[#param:]] = OpFunctionParameter %[[#vec_2_double]]
+  ; CHECK: %[[#CAST1:]] = OpBitcast %[[#vec_4_int_32]] %[[#param]]
+  ; CHECK: %[[#SHUFF2:]] = OpVectorShuffle %[[#vec_2_int_32]] %[[#CAST1]] %[[#]] 0 2
+  ; CHECK: %[[#SHUFF3:]] = OpVectorShuffle %[[#vec_2_int_32]] %[[#CAST1]] %[[#]] 1 3
+  %0 = bitcast <2 x double> %D to <4 x i32>
+  %1 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
+  %2 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
+  %add = add <2 x i32> %1, %2
+  ret <2 x i32> %add
+}
diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
index c309d992b95a5e4..a0e6f9bf9b30d9b 100644
--- a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
+++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll
@@ -1,9 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabi"
 
+; Check the function starts with `pacbti` and correct unwind info is emitted
 define hidden i32 @_Z1fi(i32 %x) "sign-return-address"="non-leaf" "sign-return-address-key"="a_key" "branch-target-enforcement" {
+; CHECK-LABEL: _Z1fi:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pacbti r12, lr, sp
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    bl _Z1gi
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r7, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %add = add nsw i32 %x, 1
   %call = tail call i32 @_Z1gi(i32 %add)
@@ -13,26 +36,10 @@ entry:
 
 declare dso_local i32 @_Z1gi(i32)
 
-; Check the function starts with `pacbti` and correct unwind info is emitted
-; CHECK-LABEL: _Z1fi:
-; ...
-; CHECK:       pacbti   r12, lr, sp
-; CHECK-NEXT:  .save    {r7, lr}
-; CHECK-NEXT:  push     {r7, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 8
-; CHECK-NEXT:  .cfi_offset lr, -4
-; CHECK-NEXT:  .cfi_offset r7, -8
-; CHECK-NEXT:  .save   {ra_auth_code}
-; CHECK-NEXT:  str     r12, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .cfi_offset ra_auth_code, -12
-; CHECK-NEXT:  .pad    #4
-; CHECK-NEXT:  sub     sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; ...
-
 ; UNWIND-LABEL: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
+
+
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
index 0ae46cb8879ee0e..31f8ecddcb986c5 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -16,6 +17,27 @@ target triple = "thumbv8m.main-none-none-eabi"
 ; }
 
 define hidden i32 @f0(i32 %x) local_unnamed_addr "sign-return-address"="non-leaf" {
+; CHECK-LABEL: f0:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r7, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %sub = add nsw i32 %x, -1
   %call = tail call i32 @g(i32 %sub)
@@ -23,27 +45,28 @@ entry:
   ret i32 %add
 }
 
-; CHECK-LABEL: f0:
-; CHECK:       pac     r12, lr, sp
-; CHECK-NEXT:  .save   {r7, lr}
-; CHECK-NEXT:  push    {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .save   {ra_auth_code}
-; CHECK-NEXT:  str     r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT: .pad    #4
-; CHECK-NEXT:  sub     sp, #4
-; ...
-; CHECK:       add     sp, #4
-; CHECK-NEXT:  ldr     r12, [sp], #4
-; CHECK-NEXT:  pop.w   {r7, lr}
-; CHECK-NEXT:  aut     r12, lr, sp
-; CHECK-NEXT:  bx      lr
-
 define hidden i32 @f1(i32 %x) local_unnamed_addr #0 {
+; CHECK-LABEL: f1:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    vstr fpcxtns, [sp, #-4]!
+; CHECK-NEXT:    .cfi_def_cfa_offset 4
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -8
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
+; CHECK-NEXT:    .cfi_offset r7, -16
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    pop.w {r7, r12, lr}
+; CHECK-NEXT:    vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-NEXT:    vldr fpcxtns, [sp], #4
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    clrm {r1, r2, r3, r12, apsr}
+; CHECK-NEXT:    bxns lr
 entry:
   %sub = add nsw i32 %x, -1
   %call = tail call i32 @g(i32 %sub)
@@ -51,44 +74,55 @@ entry:
   ret i32 %add
 }
 
-; CHECK-LABEL: f1:
-; CHECK:       pac     r12, lr, sp
-; CHECK-NEXT:  vstr    fpcxtns, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 4
-; CHECK-NEXT:  .save    {r7, lr}
-; CHECK-NEXT:  push    {r7, lr}
-; CHECK:       vldr    fpcxtns, [sp], #4
-; CHECK:       aut     r12, lr, sp
-
 define hidden i32 @f2(i32 %x) local_unnamed_addr #1 {
+; CHECK-LABEL: f2:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    subs r0, #1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r7, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    mrs r12, control
+; CHECK-NEXT:    tst.w r12, #8
+; CHECK-NEXT:    beq .LBB2_2
+; CHECK-NEXT:  @ %bb.1: @ %entry
+; CHECK-NEXT:    vmrs r12, fpscr
+; CHECK-NEXT:    vmov d0, lr, lr
+; CHECK-NEXT:    vmov d1, lr, lr
+; CHECK-NEXT:    vmov d2, lr, lr
+; CHECK-NEXT:    vmov d3, lr, lr
+; CHECK-NEXT:    vmov d4, lr, lr
+; CHECK-NEXT:    vmov d5, lr, lr
+; CHECK-NEXT:    vmov d6, lr, lr
+; CHECK-NEXT:    vmov d7, lr, lr
+; CHECK-NEXT:    bic r12, r12, #159
+; CHECK-NEXT:    bic r12, r12, #4026531840
+; CHECK-NEXT:    vmsr fpscr, r12
+; CHECK-NEXT:  .LBB2_2: @ %entry
+; CHECK-NEXT:    mov r1, lr
+; CHECK-NEXT:    mov r2, lr
+; CHECK-NEXT:    mov r3, lr
+; CHECK-NEXT:    mov r12, lr
+; CHECK-NEXT:    msr apsr_nzcvq, lr
+; CHECK-NEXT:    bxns lr
 entry:
   %sub = add nsw i32 %x, -1
   %call = tail call i32 @g(i32 %sub)
   %add = add nsw i32 %call, 1
   ret i32 %add
 }
-; CHECK-LABEL: f2:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .save  {r7, lr}
-; CHECK-NEXT:  push   {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 8
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT: .cfi_offset ra_auth_code, -12
-; CHECK-NEXT:  .pad   #4
-; CHECK-NEXT:  sub    sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; ...
-; CHECK:       add    sp, #4
-; CHECK-NEXT:  ldr    r12, [sp], #4
-; CHECK-NEXT:  pop.w  {r7, lr}
-; CHECK-NEXT:  aut    r12, lr, sp
-; CHECK-NEXT:  mrs    r12, control
-; ...
-; CHECK:       bxns    lr
 
 declare dso_local i32 @g(i32) local_unnamed_addr
 
@@ -103,22 +137,22 @@ attributes #1 = { "sign-return-address"="non-leaf" "cmse_nonsecure_entry" "targe
 
 ; UNWIND-LABEL: FunctionAddress: 0x0
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x24
-; UNWIND:       0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x54
+; UNWIND-LABEL: FunctionAddress: 0x1E
+; UNWIND:       0x80 0x08 ; pop {r7}
+; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
+
+; UNWIND-LABEL: FunctionAddress: 0x48
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
 ; UNWIND-LABEL: 00000001 {{.*}} f0
-; UNWIND-LABEL: 00000025 {{.*}} f1
-; UNWIND-LABEL: 00000055 {{.*}} f2
+; UNWIND-LABEL: 0000001f {{.*}} f1
+; UNWIND-LABEL: 00000049 {{.*}} f2
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
index 8bcf87130c54008..7e9258002f61598 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll
@@ -1,32 +1,87 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none                          | FileCheck %s --check-prefix=R7
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=reserved                      | FileCheck %s --check-prefix=R7-RES
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all                           | FileCheck %s --check-prefix=R7-ABI
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=none     -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=reserved -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11-RES
+; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all      -mattr=+aapcs-frame-chain | FileCheck %s --check-prefix=R11-ABI
 
 ; int test1() {
 ;     return 0;
 ; }
 define i32 @test1() "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test1:
-; CHECK:         .cfi_sections .debug_frame
-; CHECK-NEXT:    .cfi_startproc
-; CHECK-NEXT:  @ %bb.0: @ %entry
-; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 4
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -4
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push.w {r11, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset lr, -8
-; CHECK-NEXT:    .cfi_offset r11, -12
-; CHECK-NEXT:    .setfp r11, sp
-; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    .cfi_def_cfa_register r11
-; CHECK-NEXT:    movs r0, #0
-; CHECK-NEXT:    pop.w {r11, lr}
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    aut r12, lr, sp
-; CHECK-NEXT:    bx lr
+; R7-LABEL: test1:
+; R7:         .cfi_sections .debug_frame
+; R7-NEXT:    .cfi_startproc
+; R7-NEXT:  @ %bb.0: @ %entry
+; R7-NEXT:    movs r0, #0
+; R7-NEXT:    bx lr
+;
+; R7-RES-LABEL: test1:
+; R7-RES:         .cfi_sections .debug_frame
+; R7-RES-NEXT:    .cfi_startproc
+; R7-RES-NEXT:  @ %bb.0: @ %entry
+; R7-RES-NEXT:    movs r0, #0
+; R7-RES-NEXT:    bx lr
+;
+; R7-ABI-LABEL: test1:
+; R7-ABI:         .cfi_sections .debug_frame
+; R7-ABI-NEXT:    .cfi_startproc
+; R7-ABI-NEXT:  @ %bb.0: @ %entry
+; R7-ABI-NEXT:    pac r12, lr, sp
+; R7-ABI-NEXT:    .save {r7, lr}
+; R7-ABI-NEXT:    push {r7, lr}
+; R7-ABI-NEXT:    .cfi_def_cfa_offset 8
+; R7-ABI-NEXT:    .cfi_offset lr, -4
+; R7-ABI-NEXT:    .cfi_offset r7, -8
+; R7-ABI-NEXT:    .setfp r7, sp
+; R7-ABI-NEXT:    mov r7, sp
+; R7-ABI-NEXT:    .cfi_def_cfa_register r7
+; R7-ABI-NEXT:    .save {ra_auth_code}
+; R7-ABI-NEXT:    str r12, [sp, #-4]!
+; R7-ABI-NEXT:    .cfi_offset ra_auth_code, -12
+; R7-ABI-NEXT:    movs r0, #0
+; R7-ABI-NEXT:    ldr r12, [sp], #4
+; R7-ABI-NEXT:    pop.w {r7, lr}
+; R7-ABI-NEXT:    aut r12, lr, sp
+; R7-ABI-NEXT:    bx lr
+;
+; R11-LABEL: test1:
+; R11:         .cfi_sections .debug_frame
+; R11-NEXT:    .cfi_startproc
+; R11-NEXT:  @ %bb.0: @ %entry
+; R11-NEXT:    movs r0, #0
+; R11-NEXT:    bx lr
+;
+; R11-RES-LABEL: test1:
+; R11-RES:         .cfi_sections .debug_frame
+; R11-RES-NEXT:    .cfi_startproc
+; R11-RES-NEXT:  @ %bb.0: @ %entry
+; R11-RES-NEXT:    movs r0, #0
+; R11-RES-NEXT:    bx lr
+;
+; R11-ABI-LABEL: test1:
+; R11-ABI:         .cfi_sections .debug_frame
+; R11-ABI-NEXT:    .cfi_startproc
+; R11-ABI-NEXT:  @ %bb.0: @ %entry
+; R11-ABI-NEXT:    pac r12, lr, sp
+; R11-ABI-NEXT:    .save {ra_auth_code}
+; R11-ABI-NEXT:    str r12, [sp, #-4]!
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 4
+; R11-ABI-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT:    .save {r11, lr}
+; R11-ABI-NEXT:    push.w {r11, lr}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 12
+; R11-ABI-NEXT:    .cfi_offset lr, -8
+; R11-ABI-NEXT:    .cfi_offset r11, -12
+; R11-ABI-NEXT:    .setfp r11, sp
+; R11-ABI-NEXT:    mov r11, sp
+; R11-ABI-NEXT:    .cfi_def_cfa_register r11
+; R11-ABI-NEXT:    movs r0, #0
+; R11-ABI-NEXT:    pop.w {r11, lr}
+; R11-ABI-NEXT:    ldr r12, [sp], #4
+; R11-ABI-NEXT:    aut r12, lr, sp
+; R11-ABI-NEXT:    bx lr
 entry:
     ret i32 0
 }
@@ -36,37 +91,191 @@ entry:
 ;   bar(a);
 ; }
 define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test2:
-; CHECK:         .cfi_startproc
-; CHECK-NEXT:  @ %bb.0: @ %entry
-; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r7, ra_auth_code}
-; CHECK-NEXT:    push.w {r4, r7, r12}
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .cfi_offset r4, -12
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push.w {r11, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset lr, -16
-; CHECK-NEXT:    .cfi_offset r11, -20
-; CHECK-NEXT:    .setfp r11, sp
-; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    .cfi_def_cfa_register r11
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    movs r1, #7
-; CHECK-NEXT:    add.w r0, r1, r0, lsl #2
-; CHECK-NEXT:    bic r0, r0, #7
-; CHECK-NEXT:    sub.w r0, sp, r0
-; CHECK-NEXT:    mov sp, r0
-; CHECK-NEXT:    bl take_ptr
-; CHECK-NEXT:    mov sp, r11
-; CHECK-NEXT:    pop.w {r11, lr}
-; CHECK-NEXT:    pop.w {r4, r7, r12}
-; CHECK-NEXT:    aut r12, lr, sp
-; CHECK-NEXT:    bx lr
+; R7-LABEL: test2:
+; R7:         .cfi_startproc
+; R7-NEXT:  @ %bb.0: @ %entry
+; R7-NEXT:    pac r12, lr, sp
+; R7-NEXT:    .save {r4, r6, r7, ra_auth_code, lr}
+; R7-NEXT:    push.w {r4, r6, r7, r12, lr}
+; R7-NEXT:    .cfi_def_cfa_offset 20
+; R7-NEXT:    .cfi_offset lr, -4
+; R7-NEXT:    .cfi_offset ra_auth_code, -8
+; R7-NEXT:    .cfi_offset r7, -12
+; R7-NEXT:    .cfi_offset r6, -16
+; R7-NEXT:    .cfi_offset r4, -20
+; R7-NEXT:    .setfp r7, sp, #8
+; R7-NEXT:    add r7, sp, #8
+; R7-NEXT:    .cfi_def_cfa r7, 12
+; R7-NEXT:    .pad #4
+; R7-NEXT:    sub sp, #4
+; R7-NEXT:    movs r1, #7
+; R7-NEXT:    add.w r0, r1, r0, lsl #2
+; R7-NEXT:    bic r0, r0, #7
+; R7-NEXT:    sub.w r0, sp, r0
+; R7-NEXT:    mov sp, r0
+; R7-NEXT:    bl take_ptr
+; R7-NEXT:    sub.w r4, r7, #8
+; R7-NEXT:    mov sp, r4
+; R7-NEXT:    pop.w {r4, r6, r7, r12, lr}
+; R7-NEXT:    aut r12, lr, sp
+; R7-NEXT:    bx lr
+;
+; R7-RES-LABEL: test2:
+; R7-RES:         .cfi_startproc
+; R7-RES-NEXT:  @ %bb.0: @ %entry
+; R7-RES-NEXT:    pac r12, lr, sp
+; R7-RES-NEXT:    .save {r4, r6, r7, lr}
+; R7-RES-NEXT:    push {r4, r6, r7, lr}
+; R7-RES-NEXT:    .cfi_def_cfa_offset 16
+; R7-RES-NEXT:    .cfi_offset lr, -4
+; R7-RES-NEXT:    .cfi_offset r7, -8
+; R7-RES-NEXT:    .cfi_offset r6, -12
+; R7-RES-NEXT:    .cfi_offset r4, -16
+; R7-RES-NEXT:    .setfp r7, sp, #8
+; R7-RES-NEXT:    add r7, sp, #8
+; R7-RES-NEXT:    .cfi_def_cfa r7, 8
+; R7-RES-NEXT:    .save {ra_auth_code}
+; R7-RES-NEXT:    str r12, [sp, #-4]!
+; R7-RES-NEXT:    .cfi_offset ra_auth_code, -20
+; R7-RES-NEXT:    .pad #4
+; R7-RES-NEXT:    sub sp, #4
+; R7-RES-NEXT:    movs r1, #7
+; R7-RES-NEXT:    add.w r0, r1, r0, lsl #2
+; R7-RES-NEXT:    bic r0, r0, #7
+; R7-RES-NEXT:    sub.w r0, sp, r0
+; R7-RES-NEXT:    mov sp, r0
+; R7-RES-NEXT:    bl take_ptr
+; R7-RES-NEXT:    sub.w r4, r7, #12
+; R7-RES-NEXT:    mov sp, r4
+; R7-RES-NEXT:    ldr r12, [sp], #4
+; R7-RES-NEXT:    pop.w {r4, r6, r7, lr}
+; R7-RES-NEXT:    aut r12, lr, sp
+; R7-RES-NEXT:    bx lr
+;
+; R7-ABI-LABEL: test2:
+; R7-ABI:         .cfi_startproc
+; R7-ABI-NEXT:  @ %bb.0: @ %entry
+; R7-ABI-NEXT:    pac r12, lr, sp
+; R7-ABI-NEXT:    .save {r4, r6, r7, lr}
+; R7-ABI-NEXT:    push {r4, r6, r7, lr}
+; R7-ABI-NEXT:    .cfi_def_cfa_offset 16
+; R7-ABI-NEXT:    .cfi_offset lr, -4
+; R7-ABI-NEXT:    .cfi_offset r7, -8
+; R7-ABI-NEXT:    .cfi_offset r6, -12
+; R7-ABI-NEXT:    .cfi_offset r4, -16
+; R7-ABI-NEXT:    .setfp r7, sp, #8
+; R7-ABI-NEXT:    add r7, sp, #8
+; R7-ABI-NEXT:    .cfi_def_cfa r7, 8
+; R7-ABI-NEXT:    .save {ra_auth_code}
+; R7-ABI-NEXT:    str r12, [sp, #-4]!
+; R7-ABI-NEXT:    .cfi_offset ra_auth_code, -20
+; R7-ABI-NEXT:    .pad #4
+; R7-ABI-NEXT:    sub sp, #4
+; R7-ABI-NEXT:    movs r1, #7
+; R7-ABI-NEXT:    add.w r0, r1, r0, lsl #2
+; R7-ABI-NEXT:    bic r0, r0, #7
+; R7-ABI-NEXT:    sub.w r0, sp, r0
+; R7-ABI-NEXT:    mov sp, r0
+; R7-ABI-NEXT:    bl take_ptr
+; R7-ABI-NEXT:    sub.w r4, r7, #12
+; R7-ABI-NEXT:    mov sp, r4
+; R7-ABI-NEXT:    ldr r12, [sp], #4
+; R7-ABI-NEXT:    pop.w {r4, r6, r7, lr}
+; R7-ABI-NEXT:    aut r12, lr, sp
+; R7-ABI-NEXT:    bx lr
+;
+; R11-LABEL: test2:
+; R11:         .cfi_startproc
+; R11-NEXT:  @ %bb.0: @ %entry
+; R11-NEXT:    pac r12, lr, sp
+; R11-NEXT:    .save {r4, r7, r11, ra_auth_code, lr}
+; R11-NEXT:    push.w {r4, r7, r11, r12, lr}
+; R11-NEXT:    .cfi_def_cfa_offset 20
+; R11-NEXT:    .cfi_offset lr, -4
+; R11-NEXT:    .cfi_offset ra_auth_code, -8
+; R11-NEXT:    .cfi_offset r11, -12
+; R11-NEXT:    .cfi_offset r7, -16
+; R11-NEXT:    .cfi_offset r4, -20
+; R11-NEXT:    .setfp r11, sp, #8
+; R11-NEXT:    add.w r11, sp, #8
+; R11-NEXT:    .cfi_def_cfa r11, 12
+; R11-NEXT:    .pad #4
+; R11-NEXT:    sub sp, #4
+; R11-NEXT:    movs r1, #7
+; R11-NEXT:    add.w r0, r1, r0, lsl #2
+; R11-NEXT:    bic r0, r0, #7
+; R11-NEXT:    sub.w r0, sp, r0
+; R11-NEXT:    mov sp, r0
+; R11-NEXT:    bl take_ptr
+; R11-NEXT:    sub.w r4, r11, #8
+; R11-NEXT:    mov sp, r4
+; R11-NEXT:    pop.w {r4, r7, r11, r12, lr}
+; R11-NEXT:    aut r12, lr, sp
+; R11-NEXT:    bx lr
+;
+; R11-RES-LABEL: test2:
+; R11-RES:         .cfi_startproc
+; R11-RES-NEXT:  @ %bb.0: @ %entry
+; R11-RES-NEXT:    pac r12, lr, sp
+; R11-RES-NEXT:    .save {r4, r7, ra_auth_code}
+; R11-RES-NEXT:    push.w {r4, r7, r12}
+; R11-RES-NEXT:    .cfi_def_cfa_offset 12
+; R11-RES-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-RES-NEXT:    .cfi_offset r7, -8
+; R11-RES-NEXT:    .cfi_offset r4, -12
+; R11-RES-NEXT:    .save {r11, lr}
+; R11-RES-NEXT:    push.w {r11, lr}
+; R11-RES-NEXT:    .cfi_def_cfa_offset 20
+; R11-RES-NEXT:    .cfi_offset lr, -16
+; R11-RES-NEXT:    .cfi_offset r11, -20
+; R11-RES-NEXT:    .setfp r11, sp
+; R11-RES-NEXT:    mov r11, sp
+; R11-RES-NEXT:    .cfi_def_cfa_register r11
+; R11-RES-NEXT:    .pad #4
+; R11-RES-NEXT:    sub sp, #4
+; R11-RES-NEXT:    movs r1, #7
+; R11-RES-NEXT:    add.w r0, r1, r0, lsl #2
+; R11-RES-NEXT:    bic r0, r0, #7
+; R11-RES-NEXT:    sub.w r0, sp, r0
+; R11-RES-NEXT:    mov sp, r0
+; R11-RES-NEXT:    bl take_ptr
+; R11-RES-NEXT:    mov sp, r11
+; R11-RES-NEXT:    pop.w {r11, lr}
+; R11-RES-NEXT:    pop.w {r4, r7, r12}
+; R11-RES-NEXT:    aut r12, lr, sp
+; R11-RES-NEXT:    bx lr
+;
+; R11-ABI-LABEL: test2:
+; R11-ABI:         .cfi_startproc
+; R11-ABI-NEXT:  @ %bb.0: @ %entry
+; R11-ABI-NEXT:    pac r12, lr, sp
+; R11-ABI-NEXT:    .save {r4, r7, ra_auth_code}
+; R11-ABI-NEXT:    push.w {r4, r7, r12}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 12
+; R11-ABI-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT:    .cfi_offset r7, -8
+; R11-ABI-NEXT:    .cfi_offset r4, -12
+; R11-ABI-NEXT:    .save {r11, lr}
+; R11-ABI-NEXT:    push.w {r11, lr}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 20
+; R11-ABI-NEXT:    .cfi_offset lr, -16
+; R11-ABI-NEXT:    .cfi_offset r11, -20
+; R11-ABI-NEXT:    .setfp r11, sp
+; R11-ABI-NEXT:    mov r11, sp
+; R11-ABI-NEXT:    .cfi_def_cfa_register r11
+; R11-ABI-NEXT:    .pad #4
+; R11-ABI-NEXT:    sub sp, #4
+; R11-ABI-NEXT:    movs r1, #7
+; R11-ABI-NEXT:    add.w r0, r1, r0, lsl #2
+; R11-ABI-NEXT:    bic r0, r0, #7
+; R11-ABI-NEXT:    sub.w r0, sp, r0
+; R11-ABI-NEXT:    mov sp, r0
+; R11-ABI-NEXT:    bl take_ptr
+; R11-ABI-NEXT:    mov sp, r11
+; R11-ABI-NEXT:    pop.w {r11, lr}
+; R11-ABI-NEXT:    pop.w {r4, r7, r12}
+; R11-ABI-NEXT:    aut r12, lr, sp
+; R11-ABI-NEXT:    bx lr
 entry:
   %vla = alloca i32, i32 %n, align 4
   call void @take_ptr(ptr noundef nonnull %vla)
@@ -81,49 +290,263 @@ entry:
 ;     knr();
 ; }
 define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" {
-; CHECK-LABEL: test3:
-; CHECK:         .cfi_startproc
-; CHECK-NEXT:  @ %bb.0: @ %entry
-; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r4, r5, r6, r7, ra_auth_code}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r12}
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .cfi_offset r6, -12
-; CHECK-NEXT:    .cfi_offset r5, -16
-; CHECK-NEXT:    .cfi_offset r4, -20
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push.w {r11, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 28
-; CHECK-NEXT:    .cfi_offset lr, -24
-; CHECK-NEXT:    .cfi_offset r11, -28
-; CHECK-NEXT:    .setfp r11, sp
-; CHECK-NEXT:    mov r11, sp
-; CHECK-NEXT:    .cfi_def_cfa_register r11
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    mov r5, r2
-; CHECK-NEXT:    mov r4, r1
-; CHECK-NEXT:    it ne
-; CHECK-NEXT:    blne knr
-; CHECK-NEXT:    adds r0, r5, #7
-; CHECK-NEXT:    bic r0, r0, #7
-; CHECK-NEXT:    sub.w r0, sp, r0
-; CHECK-NEXT:    mov sp, r0
-; CHECK-NEXT:    bl take_ptr
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    movs r1, #0
-; CHECK-NEXT:    bl __aeabi_fcmpeq
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    bleq knr
-; CHECK-NEXT:    mov sp, r11
-; CHECK-NEXT:    pop.w {r11, lr}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r12}
-; CHECK-NEXT:    aut r12, lr, sp
-; CHECK-NEXT:    bx lr
+; R7-LABEL: test3:
+; R7:         .cfi_startproc
+; R7-NEXT:  @ %bb.0: @ %entry
+; R7-NEXT:    pac r12, lr, sp
+; R7-NEXT:    .save {r4, r5, r6, r7, r8, ra_auth_code, lr}
+; R7-NEXT:    push.w {r4, r5, r6, r7, r8, r12, lr}
+; R7-NEXT:    .cfi_def_cfa_offset 28
+; R7-NEXT:    .cfi_offset lr, -4
+; R7-NEXT:    .cfi_offset ra_auth_code, -8
+; R7-NEXT:    .cfi_offset r8, -12
+; R7-NEXT:    .cfi_offset r7, -16
+; R7-NEXT:    .cfi_offset r6, -20
+; R7-NEXT:    .cfi_offset r5, -24
+; R7-NEXT:    .cfi_offset r4, -28
+; R7-NEXT:    .setfp r7, sp, #12
+; R7-NEXT:    add r7, sp, #12
+; R7-NEXT:    .cfi_def_cfa r7, 16
+; R7-NEXT:    .pad #4
+; R7-NEXT:    sub sp, #4
+; R7-NEXT:    cmp r0, #0
+; R7-NEXT:    mov r5, r2
+; R7-NEXT:    mov r4, r1
+; R7-NEXT:    it ne
+; R7-NEXT:    blne knr
+; R7-NEXT:    adds r0, r5, #7
+; R7-NEXT:    bic r0, r0, #7
+; R7-NEXT:    sub.w r0, sp, r0
+; R7-NEXT:    mov sp, r0
+; R7-NEXT:    bl take_ptr
+; R7-NEXT:    mov r0, r4
+; R7-NEXT:    movs r1, #0
+; R7-NEXT:    bl __aeabi_fcmpeq
+; R7-NEXT:    cmp r0, #0
+; R7-NEXT:    it eq
+; R7-NEXT:    bleq knr
+; R7-NEXT:    sub.w r4, r7, #12
+; R7-NEXT:    mov sp, r4
+; R7-NEXT:    pop.w {r4, r5, r6, r7, r8, r12, lr}
+; R7-NEXT:    aut r12, lr, sp
+; R7-NEXT:    bx lr
+;
+; R7-RES-LABEL: test3:
+; R7-RES:         .cfi_startproc
+; R7-RES-NEXT:  @ %bb.0: @ %entry
+; R7-RES-NEXT:    pac r12, lr, sp
+; R7-RES-NEXT:    .save {r4, r5, r6, r7, lr}
+; R7-RES-NEXT:    push {r4, r5, r6, r7, lr}
+; R7-RES-NEXT:    .cfi_def_cfa_offset 20
+; R7-RES-NEXT:    .cfi_offset lr, -4
+; R7-RES-NEXT:    .cfi_offset r7, -8
+; R7-RES-NEXT:    .cfi_offset r6, -12
+; R7-RES-NEXT:    .cfi_offset r5, -16
+; R7-RES-NEXT:    .cfi_offset r4, -20
+; R7-RES-NEXT:    .setfp r7, sp, #12
+; R7-RES-NEXT:    add r7, sp, #12
+; R7-RES-NEXT:    .cfi_def_cfa r7, 8
+; R7-RES-NEXT:    .save {r8, ra_auth_code}
+; R7-RES-NEXT:    push.w {r8, r12}
+; R7-RES-NEXT:    .cfi_offset ra_auth_code, -24
+; R7-RES-NEXT:    .cfi_offset r8, -28
+; R7-RES-NEXT:    .pad #4
+; R7-RES-NEXT:    sub sp, #4
+; R7-RES-NEXT:    cmp r0, #0
+; R7-RES-NEXT:    mov r5, r2
+; R7-RES-NEXT:    mov r4, r1
+; R7-RES-NEXT:    it ne
+; R7-RES-NEXT:    blne knr
+; R7-RES-NEXT:    adds r0, r5, #7
+; R7-RES-NEXT:    bic r0, r0, #7
+; R7-RES-NEXT:    sub.w r0, sp, r0
+; R7-RES-NEXT:    mov sp, r0
+; R7-RES-NEXT:    bl take_ptr
+; R7-RES-NEXT:    mov r0, r4
+; R7-RES-NEXT:    movs r1, #0
+; R7-RES-NEXT:    bl __aeabi_fcmpeq
+; R7-RES-NEXT:    cmp r0, #0
+; R7-RES-NEXT:    it eq
+; R7-RES-NEXT:    bleq knr
+; R7-RES-NEXT:    sub.w r4, r7, #20
+; R7-RES-NEXT:    mov sp, r4
+; R7-RES-NEXT:    pop.w {r8, r12}
+; R7-RES-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; R7-RES-NEXT:    aut r12, lr, sp
+; R7-RES-NEXT:    bx lr
+;
+; R7-ABI-LABEL: test3:
+; R7-ABI:         .cfi_startproc
+; R7-ABI-NEXT:  @ %bb.0: @ %entry
+; R7-ABI-NEXT:    pac r12, lr, sp
+; R7-ABI-NEXT:    .save {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT:    push {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT:    .cfi_def_cfa_offset 20
+; R7-ABI-NEXT:    .cfi_offset lr, -4
+; R7-ABI-NEXT:    .cfi_offset r7, -8
+; R7-ABI-NEXT:    .cfi_offset r6, -12
+; R7-ABI-NEXT:    .cfi_offset r5, -16
+; R7-ABI-NEXT:    .cfi_offset r4, -20
+; R7-ABI-NEXT:    .setfp r7, sp, #12
+; R7-ABI-NEXT:    add r7, sp, #12
+; R7-ABI-NEXT:    .cfi_def_cfa r7, 8
+; R7-ABI-NEXT:    .save {r8, ra_auth_code}
+; R7-ABI-NEXT:    push.w {r8, r12}
+; R7-ABI-NEXT:    .cfi_offset ra_auth_code, -24
+; R7-ABI-NEXT:    .cfi_offset r8, -28
+; R7-ABI-NEXT:    .pad #4
+; R7-ABI-NEXT:    sub sp, #4
+; R7-ABI-NEXT:    cmp r0, #0
+; R7-ABI-NEXT:    mov r5, r2
+; R7-ABI-NEXT:    mov r4, r1
+; R7-ABI-NEXT:    it ne
+; R7-ABI-NEXT:    blne knr
+; R7-ABI-NEXT:    adds r0, r5, #7
+; R7-ABI-NEXT:    bic r0, r0, #7
+; R7-ABI-NEXT:    sub.w r0, sp, r0
+; R7-ABI-NEXT:    mov sp, r0
+; R7-ABI-NEXT:    bl take_ptr
+; R7-ABI-NEXT:    mov r0, r4
+; R7-ABI-NEXT:    movs r1, #0
+; R7-ABI-NEXT:    bl __aeabi_fcmpeq
+; R7-ABI-NEXT:    cmp r0, #0
+; R7-ABI-NEXT:    it eq
+; R7-ABI-NEXT:    bleq knr
+; R7-ABI-NEXT:    sub.w r4, r7, #20
+; R7-ABI-NEXT:    mov sp, r4
+; R7-ABI-NEXT:    pop.w {r8, r12}
+; R7-ABI-NEXT:    pop.w {r4, r5, r6, r7, lr}
+; R7-ABI-NEXT:    aut r12, lr, sp
+; R7-ABI-NEXT:    bx lr
+;
+; R11-LABEL: test3:
+; R11:         .cfi_startproc
+; R11-NEXT:  @ %bb.0: @ %entry
+; R11-NEXT:    pac r12, lr, sp
+; R11-NEXT:    .save {r4, r5, r6, r7, r11, ra_auth_code, lr}
+; R11-NEXT:    push.w {r4, r5, r6, r7, r11, r12, lr}
+; R11-NEXT:    .cfi_def_cfa_offset 28
+; R11-NEXT:    .cfi_offset lr, -4
+; R11-NEXT:    .cfi_offset ra_auth_code, -8
+; R11-NEXT:    .cfi_offset r11, -12
+; R11-NEXT:    .cfi_offset r7, -16
+; R11-NEXT:    .cfi_offset r6, -20
+; R11-NEXT:    .cfi_offset r5, -24
+; R11-NEXT:    .cfi_offset r4, -28
+; R11-NEXT:    .setfp r11, sp, #16
+; R11-NEXT:    add.w r11, sp, #16
+; R11-NEXT:    .cfi_def_cfa r11, 12
+; R11-NEXT:    .pad #4
+; R11-NEXT:    sub sp, #4
+; R11-NEXT:    cmp r0, #0
+; R11-NEXT:    mov r5, r2
+; R11-NEXT:    mov r4, r1
+; R11-NEXT:    it ne
+; R11-NEXT:    blne knr
+; R11-NEXT:    adds r0, r5, #7
+; R11-NEXT:    bic r0, r0, #7
+; R11-NEXT:    sub.w r0, sp, r0
+; R11-NEXT:    mov sp, r0
+; R11-NEXT:    bl take_ptr
+; R11-NEXT:    mov r0, r4
+; R11-NEXT:    movs r1, #0
+; R11-NEXT:    bl __aeabi_fcmpeq
+; R11-NEXT:    cmp r0, #0
+; R11-NEXT:    it eq
+; R11-NEXT:    bleq knr
+; R11-NEXT:    sub.w r4, r11, #16
+; R11-NEXT:    mov sp, r4
+; R11-NEXT:    pop.w {r4, r5, r6, r7, r11, r12, lr}
+; R11-NEXT:    aut r12, lr, sp
+; R11-NEXT:    bx lr
+;
+; R11-RES-LABEL: test3:
+; R11-RES:         .cfi_startproc
+; R11-RES-NEXT:  @ %bb.0: @ %entry
+; R11-RES-NEXT:    pac r12, lr, sp
+; R11-RES-NEXT:    .save {r4, r5, r6, r7, ra_auth_code}
+; R11-RES-NEXT:    push.w {r4, r5, r6, r7, r12}
+; R11-RES-NEXT:    .cfi_def_cfa_offset 20
+; R11-RES-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-RES-NEXT:    .cfi_offset r7, -8
+; R11-RES-NEXT:    .cfi_offset r6, -12
+; R11-RES-NEXT:    .cfi_offset r5, -16
+; R11-RES-NEXT:    .cfi_offset r4, -20
+; R11-RES-NEXT:    .save {r11, lr}
+; R11-RES-NEXT:    push.w {r11, lr}
+; R11-RES-NEXT:    .cfi_def_cfa_offset 28
+; R11-RES-NEXT:    .cfi_offset lr, -24
+; R11-RES-NEXT:    .cfi_offset r11, -28
+; R11-RES-NEXT:    .setfp r11, sp
+; R11-RES-NEXT:    mov r11, sp
+; R11-RES-NEXT:    .cfi_def_cfa_register r11
+; R11-RES-NEXT:    .pad #4
+; R11-RES-NEXT:    sub sp, #4
+; R11-RES-NEXT:    cmp r0, #0
+; R11-RES-NEXT:    mov r5, r2
+; R11-RES-NEXT:    mov r4, r1
+; R11-RES-NEXT:    it ne
+; R11-RES-NEXT:    blne knr
+; R11-RES-NEXT:    adds r0, r5, #7
+; R11-RES-NEXT:    bic r0, r0, #7
+; R11-RES-NEXT:    sub.w r0, sp, r0
+; R11-RES-NEXT:    mov sp, r0
+; R11-RES-NEXT:    bl take_ptr
+; R11-RES-NEXT:    mov r0, r4
+; R11-RES-NEXT:    movs r1, #0
+; R11-RES-NEXT:    bl __aeabi_fcmpeq
+; R11-RES-NEXT:    cmp r0, #0
+; R11-RES-NEXT:    it eq
+; R11-RES-NEXT:    bleq knr
+; R11-RES-NEXT:    mov sp, r11
+; R11-RES-NEXT:    pop.w {r11, lr}
+; R11-RES-NEXT:    pop.w {r4, r5, r6, r7, r12}
+; R11-RES-NEXT:    aut r12, lr, sp
+; R11-RES-NEXT:    bx lr
+;
+; R11-ABI-LABEL: test3:
+; R11-ABI:         .cfi_startproc
+; R11-ABI-NEXT:  @ %bb.0: @ %entry
+; R11-ABI-NEXT:    pac r12, lr, sp
+; R11-ABI-NEXT:    .save {r4, r5, r6, r7, ra_auth_code}
+; R11-ABI-NEXT:    push.w {r4, r5, r6, r7, r12}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 20
+; R11-ABI-NEXT:    .cfi_offset ra_auth_code, -4
+; R11-ABI-NEXT:    .cfi_offset r7, -8
+; R11-ABI-NEXT:    .cfi_offset r6, -12
+; R11-ABI-NEXT:    .cfi_offset r5, -16
+; R11-ABI-NEXT:    .cfi_offset r4, -20
+; R11-ABI-NEXT:    .save {r11, lr}
+; R11-ABI-NEXT:    push.w {r11, lr}
+; R11-ABI-NEXT:    .cfi_def_cfa_offset 28
+; R11-ABI-NEXT:    .cfi_offset lr, -24
+; R11-ABI-NEXT:    .cfi_offset r11, -28
+; R11-ABI-NEXT:    .setfp r11, sp
+; R11-ABI-NEXT:    mov r11, sp
+; R11-ABI-NEXT:    .cfi_def_cfa_register r11
+; R11-ABI-NEXT:    .pad #4
+; R11-ABI-NEXT:    sub sp, #4
+; R11-ABI-NEXT:    cmp r0, #0
+; R11-ABI-NEXT:    mov r5, r2
+; R11-ABI-NEXT:    mov r4, r1
+; R11-ABI-NEXT:    it ne
+; R11-ABI-NEXT:    blne knr
+; R11-ABI-NEXT:    adds r0, r5, #7
+; R11-ABI-NEXT:    bic r0, r0, #7
+; R11-ABI-NEXT:    sub.w r0, sp, r0
+; R11-ABI-NEXT:    mov sp, r0
+; R11-ABI-NEXT:    bl take_ptr
+; R11-ABI-NEXT:    mov r0, r4
+; R11-ABI-NEXT:    movs r1, #0
+; R11-ABI-NEXT:    bl __aeabi_fcmpeq
+; R11-ABI-NEXT:    cmp r0, #0
+; R11-ABI-NEXT:    it eq
+; R11-ABI-NEXT:    bleq knr
+; R11-ABI-NEXT:    mov sp, r11
+; R11-ABI-NEXT:    pop.w {r11, lr}
+; R11-ABI-NEXT:    pop.w {r4, r5, r6, r7, r12}
+; R11-ABI-NEXT:    aut r12, lr, sp
+; R11-ABI-NEXT:    bx lr
 entry:
   %tobool.not = icmp eq i32 %c, 0
   br i1 %tobool.not, label %if.end, label %if.then
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
index 4dfac252e2314ce..615af15e8b5679f 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll
@@ -1,11 +1,30 @@
-; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK1
-; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc %s -o - | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabi"
 
 @p = hidden local_unnamed_addr global ptr null, align 4
 
 define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r12, lr}
+; CHECK-NEXT:    mov r7, r3
+; CHECK-NEXT:    mov r5, r2
+; CHECK-NEXT:    mov r6, r1
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    movw r1, :lower16:p
+; CHECK-NEXT:    mov r2, r5
+; CHECK-NEXT:    movt r1, :upper16:p
+; CHECK-NEXT:    mov r3, r7
+; CHECK-NEXT:    ldr r4, [r1]
+; CHECK-NEXT:    mov r1, r6
+; CHECK-NEXT:    blx r4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %call = tail call i32 @g(i32 %a) #0
   %0 = load ptr, ptr @p, align 4
@@ -13,19 +32,6 @@ entry:
   ret i32 %call1
 }
 
-; CHECK1-LABEL: f
-; ...
-; CHECK1:       aut r12, lr, sp
-; CHECK1-NOT:   bx r12
-
-; CHECK2-LABEL: f
-; ...
-; CHECK2:       blx r4
-; CHECK2-NEXT:  ldr r12, [sp], #4
-; CHECK2-NEXT:  pop.w {r4, r5, r6, r7, lr}
-; CHECK2-NEXT:  aut r12, lr, sp
-; CHECK2-NEXT:  bx lr
-
 declare dso_local i32 @g(i32) local_unnamed_addr #0
 
 attributes #0 = { nounwind "sign-return-address"="non-leaf"}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
index 1b13e06546f152b..d02d4b51d73b53e 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -21,12 +22,43 @@ target triple = "thumbv7m-arm-none-eabi"
 ; }
 
 define hidden i32 @h(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: h:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    bx lr
 entry:
   %add = add nsw i32 %b, %a
   ret i32 %add
 }
 
 define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB1_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r0, #2
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB1_3: @ %return
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -48,34 +80,32 @@ return:                                           ; preds = %entry, %if.end
   ret i32 %retval.0
 }
 
-; CHECK-LABEL: f:
-; ...
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .save  {r4, r5, r6, lr}
-; CHECK-NEXT:  push   {r4, r5, r6, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .cfi_offset lr, -4
-; CHECK-NEXT:  .cfi_offset r6, -8
-; CHECK-NEXT:  .cfi_offset r5, -12
-; CHECK-NEXT:  .cfi_offset r4, -16
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 20
-; CHECK-NEXT:  .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:  .pad    #4
-; CHECK-NEXT:  sub     sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 24
-; ...
-; CHECK:        bl  OUTLINED_FUNCTION_0
-; ...
-; CHECK:        add    sp, #4
-; CHECK-NEXT:   ldr    r12, [sp], #4
-; CHECK-NEXT:   pop.w  {r4, r5, r6, lr}
-; CHECK-NEXT:   aut    r12, lr, sp
-; CHECK-NEXT:   bx     lr
-
-
 define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: g:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB2_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    b .LBB2_3
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB2_3: @ %return
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -96,30 +126,6 @@ return:                                           ; preds = %entry, %if.end
   %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ]
   ret i32 %retval.0
 }
-; CHECK-LABEL: g:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .save  {r4, r5, r6, lr}
-; CHECK-NEXT:  push   {r4, r5, r6, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 16
-; CHECK-NEXT:  .cfi_offset lr, -4
-; CHECK-NEXT:  .cfi_offset r6, -8
-; CHECK-NEXT:  .cfi_offset r5, -12
-; CHECK-NEXT:  .cfi_offset r4, -16
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT:  .cfi_def_cfa_offset 20
-; CHECK-NEXT:  .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:  .pad   #4
-; CHECK-NEXT:  sub    sp, #4
-; CHECK-NEXT:  .cfi_def_cfa_offset 24
-; ...
-; CHECK:        bl  OUTLINED_FUNCTION_0
-; ...
-; CHECK:       add    sp, #4
-; CHECK-NEXT:  ldr    r12, [sp], #4
-; CHECK-NEXT:  pop.w  {r4, r5, r6, lr}
-; CHECK-NEXT:  aut    r12, lr, sp
-; CHECK-NEXT:  bx     lr
 
 ; CHECK-LABEL: OUTLINED_FUNCTION_0:
 ; CHECK:       pac    r12, lr, sp
@@ -147,18 +153,20 @@ attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable "
 
 ; UNWIND-LABEL: FunctionAddress: 0x4
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x30
+; UNWIND-LABEL: FunctionAddress: 0x26
 ; UNWIND:       0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x5C
+; UNWIND-LABEL: FunctionAddress: 0x48
 ; UNWIND:       0xB4      ; pop ra_auth_code
-; UNWIND:       0x84 0x00 ; pop {lr}
- 
-; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
+
+; UNWIND-LABEL: 00000049 {{.*}} OUTLINED_FUNCTION_0
 ; UNWIND-LABEL: 00000005 {{.*}} f
-; UNWIND-LABEL: 00000031 {{.*}} g
+; UNWIND-LABEL: 00000027 {{.*}} g
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
index 38c23977b623f9d..8777d517c4badcb 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -27,6 +28,37 @@ target triple = "thumbv7m-arm-none-eabi"
 @_ZTIi = external dso_local constant ptr
 
 define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1hii:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    push.w {r6, r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    ble .LBB0_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    pop.w {r3, r7, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
+; CHECK-NEXT:  .LBB0_2: @ %if.then
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    bl __cxa_allocate_exception
+; CHECK-NEXT:    movs r1, #1
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    str r1, [r0]
+; CHECK-NEXT:    ldr r1, .LCPI0_0
+; CHECK-NEXT:    bl __cxa_throw
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.3:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long _ZTIi
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %if.then, label %if.end
@@ -42,31 +74,40 @@ if.end:                                           ; preds = %entry
   ret i32 %add
 }
 
-; CHECK-LABEL: _Z1hii:
-; ...
-; CHECK:    pac    r12, lr, sp
-; CHECK-NEXT:    .save    {r7, lr}
-; CHECK-NEXT:    push    {r7, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 8
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r7, -8
-; CHECK-NEXT:    .save    {ra_auth_code}
-; CHECK-NEXT:    str    r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 12
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -12
-; CHECK-NEXT:    .pad    #4
-; CHECK-NEXT:    sub    sp, #4
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; ...
-; CHECK-NOT: pac
-; CHECK: aut
-; CHECK:     .cfi_endproc
-
 declare dso_local ptr @__cxa_allocate_exception(i32) local_unnamed_addr
 
 declare dso_local void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr
 
 define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiiii:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB1_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r1, r0, r6
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    adds r1, r4, r5
+; CHECK-NEXT:    sdiv r0, r0, r1
+; CHECK-NEXT:    adds r0, #2
+; CHECK-NEXT:    b .LBB1_3
+; CHECK-NEXT:  .LBB1_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB1_3: @ %return
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -85,35 +126,36 @@ return:                                           ; preds = %entry, %if.end
   ret i32 %retval.0
 }
 
-; CHECK-LABEL: _Z1fiiii:
-; ...
-; CHECK:    pac    r12, lr, sp
-; CHECK-NEXT:    .save    {r4, r5, r6, lr}
-; CHECK-NEXT:    push    {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save    {ra_auth_code}
-; CHECK-NEXT:    str    r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:    .pad    #4
-; CHECK-NEXT:    sub    sp, #4
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; ...
-; CHECK:    bl	OUTLINED_FUNCTION_0
-; ...
-; CHECK:    add    sp, #4
-; CHECK-NEXT:    ldr    r12, [sp], #4
-; CHECK-NEXT:    pop.w    {r4, r5, r6, lr}
-; CHECK-NEXT:    aut    r12, lr, sp
-; CHECK-NEXT:    bx    lr
-
-
-
 define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1giiii:
+; CHECK:         .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, ra_auth_code, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    push.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r6, -12
+; CHECK-NEXT:    .cfi_offset r5, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    bmi .LBB2_2
+; CHECK-NEXT:  @ %bb.1: @ %if.end
+; CHECK-NEXT:    bl OUTLINED_FUNCTION_0
+; CHECK-NEXT:    adds r1, r0, r6
+; CHECK-NEXT:    muls r0, r1, r0
+; CHECK-NEXT:    adds r1, r4, r5
+; CHECK-NEXT:    sdiv r0, r0, r1
+; CHECK-NEXT:    adds r0, #1
+; CHECK-NEXT:    b .LBB2_3
+; CHECK-NEXT:  .LBB2_2:
+; CHECK-NEXT:    mov.w r0, #-1
+; CHECK-NEXT:  .LBB2_3: @ %return
+; CHECK-NEXT:    pop.w {r3, r4, r5, r6, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp slt i32 %a, 0
   br i1 %cmp, label %return, label %if.end
@@ -132,33 +174,6 @@ return:                                           ; preds = %entry, %if.end
   ret i32 %retval.0
 }
 
-; CHECK-LABEL: _Z1giiii:
-; ...
-; CHECK:    pac    r12, lr, sp
-; CHECK-NEXT:    .save    {r4, r5, r6, lr}
-; CHECK-NEXT:    push    {r4, r5, r6, lr}
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    .cfi_offset lr, -4
-; CHECK-NEXT:    .cfi_offset r6, -8
-; CHECK-NEXT:    .cfi_offset r5, -12
-; CHECK-NEXT:    .cfi_offset r4, -16
-; CHECK-NEXT:    .save    {ra_auth_code}
-; CHECK-NEXT:    str    r12, [sp, #-4]!
-; CHECK-NEXT:    .cfi_def_cfa_offset 20
-; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
-; CHECK-NEXT:    .pad    #4
-; CHECK-NEXT:    sub    sp, #4
-; CHECK-NEXT:    .cfi_def_cfa_offset 24
-; ...
-; CHECK:    bl	OUTLINED_FUNCTION_0
-; ...
-; CHECK:    add    sp, #4
-; CHECK-NEXT:    ldr    r12, [sp], #4
-; CHECK-NEXT:    pop.w    {r4, r5, r6, lr}
-; CHECK-NEXT:    aut    r12, lr, sp
-; CHECK-NEXT:    bx    lr
-
-
 ; CHEK-LABEL: OUTLINED_FUNCTION_0:
 ; CHECK-NOT: pac
 ; CHECK-NOT: aut
@@ -177,32 +192,31 @@ attributes #2 = { noreturn "sign-return-address"="non-leaf" }
 
 
 ; UNWIND-LABEL: FunctionAddress: 0x0
-; UNWIND:       Opcodes
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0x80 0x08 ; pop {r7}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x08 ; pop {r7, lr}
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x3C
-; UNWIND:       Opcodes
+; UNWIND-LABEL: FunctionAddress: 0x30
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0x72
-; UNWIND:       Opcodes
+; UNWIND-LABEL: FunctionAddress: 0x5C
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0x00      ; vsp = vsp + 4
+; UNWIND-NEXT:  0xA2      ; pop {r4, r5, r6}
 ; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0xAA      ; pop {r4, r5, r6, lr}
+; UNWIND-NEXT:  0x84 0x00 ; pop {lr}
 
-; UNWIND-LABEL: FunctionAddress: 0xA8
-; UNWIND:       Opcodes
-; UNWIND-NEXT:  0xB0      ; finish
-; UNWIND-NEXT:  0xB0      ; finish
+; UNWIND-LABEL: FunctionAddress: 0x88
+; UNWIND: Opcodes [
 ; UNWIND-NEXT:  0xB0      ; finish
 
-; UNWIND: 000000a9 {{.*}} OUTLINED_FUNCTION_0
+; UNWIND: 00000089 {{.*}} OUTLINED_FUNCTION_0
 ; UWNIND: 00000001 {{.*}} _Z1hii
-; UWNIND: 0000003d {{.*}} _Z1fiiii
-; UWNIND: 00000073 {{.*}} _Z1giiii
+; UWNIND: 00000031 {{.*}} _Z1fiiii
+; UWNIND: 0000005d {{.*}} _Z1giiii
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
index 5dce6752c065e1c..5354303a034d4ed 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -15,6 +16,39 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; }
 
 define hidden i32 @_Z1fv() local_unnamed_addr "sign-return-address"="non-leaf" {
+; CHECK-LABEL: _Z1fv:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r6, r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r6, r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 20
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r7, -12
+; CHECK-NEXT:    .cfi_offset r6, -16
+; CHECK-NEXT:    .cfi_offset r4, -20
+; CHECK-NEXT:    .setfp r7, sp, #8
+; CHECK-NEXT:    add r7, sp, #8
+; CHECK-NEXT:    .cfi_def_cfa r7, 12
+; CHECK-NEXT:    .pad #44
+; CHECK-NEXT:    sub sp, #44
+; CHECK-NEXT:    mov r4, sp
+; CHECK-NEXT:    bfc r4, #0, #5
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r1, sp
+; CHECK-NEXT:    movs r0, #4
+; CHECK-NEXT:    bl _Z1giPi
+; CHECK-NEXT:    ldm.w sp, {r0, r1, r2, r3}
+; CHECK-NEXT:    sub.w r4, r7, #8
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    add r0, r2
+; CHECK-NEXT:    add r0, r3
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop.w {r4, r6, r7, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %a = alloca [4 x i32], align 32
   %call = call i32 @_Z1giPi(i32 4, ptr nonnull %a)
@@ -31,29 +65,6 @@ entry:
   ret i32 %add.3
 }
 
-; CHECK-LABEL: _Z1fv:
-; CHECK:      pac     r12, lr, sp
-; CHECK:      .save   {r4, r6, r7, lr}
-; CHECK-NEXT: push    {r4, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r4, -16
-; CHECK-NEXT: .setfp  r7, sp, #8
-; CHECK-NEXT: add     r7, sp, #8
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save   {ra_auth_code}
-; CHECK-NEXT: str     r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_offset ra_auth_code, -20
-; CHECK-NEXT: .pad    #44
-; CHECK-NEXT: sub     sp, #44
-; CHECK:      ldr     r12, [sp], #4
-; CHECK-NEXT: pop.w   {r4, r6, r7, lr}
-; CHECK-NEXT: aut     r12, lr, sp
-; CHECK-NEXT: bx      lr
-
-
 declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr
 
 !llvm.module.flags = !{!0, !1, !2}
@@ -64,6 +75,7 @@ declare dso_local i32 @_Z1giPi(i32, ptr) local_unnamed_addr
 
 ; UNWIND-LABEL:        FunctionAddress: 0x0
 ; UNWIND:          0x97      ; vsp = r7
-; UNWIND:          0x42      ; vsp = vsp - 12
-; UNWIND:          0xB4      ; pop ra_auth_code
-; UNWIND:          0x84 0x0D ; pop {r4, r6, r7, lr}
+; UNWIND-NEXT:     0x41      ; vsp = vsp - 8
+; UNWIND-NEXT:     0x80 0x0D ; pop {r4, r6, r7}
+; UNWIND-NEXT:     0xB4      ; pop ra_auth_code
+; UNWIND-NEXT:     0x84 0x00 ; pop {lr}
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
index cae38b5e4a5a1b6..c0c32de509b75d2 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-stack-arg.ll
@@ -19,17 +19,14 @@ define i32 @test_non_leaf(i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %x) "s
 ; CHECK-LABEL: test_non_leaf:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    pac r12, lr, sp
-; CHECK-NEXT:    .save {r7, lr}
-; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    .save {ra_auth_code}
-; CHECK-NEXT:    str r12, [sp, #-4]!
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    bl otherfn
 ; CHECK-NEXT:    ldr r0, [sp, #16]
 ; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    ldr r12, [sp], #4
-; CHECK-NEXT:    pop.w {r7, lr}
+; CHECK-NEXT:    pop.w {r7, r12, lr}
 ; CHECK-NEXT:    aut r12, lr, sp
 ; CHECK-NEXT:    bx lr
 entry:
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
index d027c9e8c7b548f..2b7abfabf7035ab 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
@@ -5,6 +6,50 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 %"struct.std::__va_list" = type { ptr }
 
 define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiz:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, #12
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .save {r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    .cfi_offset lr, -16
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .cfi_offset r7, -24
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .cfi_def_cfa_offset 28
+; CHECK-NEXT:    add.w r12, sp, #16
+; CHECK-NEXT:    cmp r0, #1
+; CHECK-NEXT:    stm.w r12, {r1, r2, r3}
+; CHECK-NEXT:    add r1, sp, #16
+; CHECK-NEXT:    str r1, [sp]
+; CHECK-NEXT:    blt .LBB0_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.lr.ph
+; CHECK-NEXT:    ldr r1, [sp]
+; CHECK-NEXT:    dls lr, r0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:  .LBB0_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    str r1, [sp]
+; CHECK-NEXT:    ldr r2, [r1, #-4]
+; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:    add r0, r2
+; CHECK-NEXT:    le lr, .LBB0_2
+; CHECK-NEXT:    b .LBB0_4
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:  .LBB0_4: @ %for.cond.cleanup
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r7, r12, lr}
+; CHECK-NEXT:    add sp, #12
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %ap = alloca %"struct.std::__va_list", align 4
   call void @llvm.va_start(ptr nonnull %ap)
@@ -33,34 +78,6 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: _Z1fiz:
-; CHECK:      pac    r12, lr, sp
-; CHECK-NEXT: .pad    #12
-; CHECK-NEXT: sub    sp, #12
-; CHECK-NEXT: .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .save    {r7, lr}
-; CHECK-NEXT: push    {r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -16
-; CHECK-NEXT: .cfi_offset r7, -20
-; CHECK-NEXT: .save    {ra_auth_code}
-; CHECK-NEXT: str    r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 24
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .pad    #4
-; CHECK-NEXT: sub    sp, #4
-; CHECK-NEXT: .cfi_def_cfa_offset 28
-; ...
-; CHECK:      add.w r[[N:[0-9]*]], sp, #16
-; CHECK:      stm.w r[[N]], {r1, r2, r3}
-; ...
-; CHECK:      add    sp, #4
-; CHECK-NEXT: ldr    r12, [sp], #4
-; CHECK-NEXT: pop.w    {r7, lr}
-; CHECK-NEXT: add    sp, #12
-; CHECK-NEXT: aut    r12, lr, sp
-; CHECK-NEXT: bx    lr
-
 declare void @llvm.va_start(ptr) #1
 declare void @llvm.va_end(ptr) #1
 
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
index 8019cd5b6109eb3..03b769f256bc28f 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 ; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
@@ -19,6 +20,50 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 %"struct.std::__va_list" = type { ptr }
 
 define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z1fiz:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .pad #12
+; CHECK-NEXT:    sub sp, #12
+; CHECK-NEXT:    .cfi_def_cfa_offset 12
+; CHECK-NEXT:    .save {r4, r5, r7, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r5, r7, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, -16
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -20
+; CHECK-NEXT:    .cfi_offset r7, -24
+; CHECK-NEXT:    .cfi_offset r5, -28
+; CHECK-NEXT:    .cfi_offset r4, -32
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    add r0, sp, #28
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    cmp r4, #1
+; CHECK-NEXT:    stm r0!, {r1, r2, r3}
+; CHECK-NEXT:    add r0, sp, #28
+; CHECK-NEXT:    str r0, [sp, #4]
+; CHECK-NEXT:    blt .LBB0_2
+; CHECK-NEXT:  .LBB0_1: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr r0, [sp, #4]
+; CHECK-NEXT:    adds r1, r0, #4
+; CHECK-NEXT:    str r1, [sp, #4]
+; CHECK-NEXT:    ldr r0, [r0]
+; CHECK-NEXT:    bl _Z1gi
+; CHECK-NEXT:    add r5, r0
+; CHECK-NEXT:    subs r4, #1
+; CHECK-NEXT:    bne .LBB0_1
+; CHECK-NEXT:  .LBB0_2: @ %for.cond.cleanup
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    add sp, #8
+; CHECK-NEXT:    pop.w {r4, r5, r7, r12, lr}
+; CHECK-NEXT:    add sp, #12
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %ap = alloca %"struct.std::__va_list", align 4
   call void @llvm.va_start(ptr nonnull %ap)
@@ -47,36 +92,6 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: _Z1fiz:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT:  .pad   #12
-; CHECK-NEXT:  sub    sp, #12
-; CHECK-NEXT:  .cfi_def_cfa_offset 12
-; CHECK-NEXT:  .save   {r4, r5, r7, lr}
-; CHECK-NEXT:  push    {r4, r5, r7, lr}
-; CHECK-NEXT:  .cfi_def_cfa_offset 28
-; CHECK-NEXT:  .cfi_offset lr, -16
-; CHECK-NEXT:  .cfi_offset r7, -20
-; CHECK-NEXT:  .cfi_offset r5, -24
-; CHECK-NEXT:  .cfi_offset r4, -28
-; CHECK-NEXT:  .save  {ra_auth_code}
-; CHECK-NEXT:  str    r12, [sp, #-4]!
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset ra_auth_code, -32
-; CHECK-NEXT:  .pad   #8
-; CHECK-NEXT:  sub    sp, #8
-; CHECK-NEXT: .cfi_def_cfa_offset 40
-; ...
-; CHECK:       add    r[[N:[0-9]*]], sp, #28
-; CHECK:       stm    r[[N]]!, {r1, r2, r3}
-; ...
-; CHECK:       add    sp, #8
-; CHECK-NEXT:  ldr    r12, [sp], #4
-; CHECK-NEXT:  pop.w  {r4, r5, r7, lr}
-; CHECK-NEXT:  add    sp, #12
-; CHECK-NEXT:  aut    r12, lr, sp
-; CHECK-NEXT:  bx     lr
-
 declare void @llvm.va_start(ptr) #1
 declare void @llvm.va_end(ptr) #1
 
@@ -92,7 +107,9 @@ attributes #1 = { nounwind "sign-return-address"="non-leaf"}
 !2 = !{i32 8, !"sign-return-address-all", i32 0}
 
 ; UNWIND-LABEL: FunctionAddress
-; UNWIND:       0x01      ; vsp = vsp + 8
-; UNWIND-NEXT:  0xB4      ; pop ra_auth_code
-; UNWIND-NEXT:  0x84 0x0B ; pop {r4, r5, r7, lr}
-; UNWIND-NEXT:  0x02      ; vsp = vsp + 12
+; UNWIND:      0x01      ; vsp = vsp + 8
+; UNWIND-NEXT: 0x80 0x0B ; pop {r4, r5, r7}
+; UNWIND-NEXT: 0xB4      ; pop ra_auth_code
+; UNWIND-NEXT: 0x84 0x00 ; pop {lr}
+; UNWIND-NEXT: 0x02      ; vsp = vsp + 12
+
diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
index c1d17a7587be058..5eb5990be7c1183 100644
--- a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
+++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-none-eabi"
@@ -14,6 +15,89 @@ target triple = "thumbv8.1m.main-arm-none-eabi"
 ; }
 
 define hidden i32 @f(i32 %n) local_unnamed_addr #0 {
+; CHECK-LABEL: f:
+; CHECK:         .cfi_sections .debug_frame
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:  @ %bb.0: @ %entry
+; CHECK-NEXT:    pac r12, lr, sp
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, ra_auth_code, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r12, lr}
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, -4
+; CHECK-NEXT:    .cfi_offset ra_auth_code, -8
+; CHECK-NEXT:    .cfi_offset r9, -12
+; CHECK-NEXT:    .cfi_offset r8, -16
+; CHECK-NEXT:    .cfi_offset r7, -20
+; CHECK-NEXT:    .cfi_offset r6, -24
+; CHECK-NEXT:    .cfi_offset r5, -28
+; CHECK-NEXT:    .cfi_offset r4, -32
+; CHECK-NEXT:    .setfp r7, sp, #12
+; CHECK-NEXT:    add r7, sp, #12
+; CHECK-NEXT:    .cfi_def_cfa r7, 20
+; CHECK-NEXT:    mov r5, r0
+; CHECK-NEXT:    movs r0, #7
+; CHECK-NEXT:    add.w r0, r0, r5, lsl #2
+; CHECK-NEXT:    bic r0, r0, #7
+; CHECK-NEXT:    sub.w r4, sp, r0
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    mov r0, r5
+; CHECK-NEXT:    mov r1, r4
+; CHECK-NEXT:    bl g
+; CHECK-NEXT:    cmp r5, #1
+; CHECK-NEXT:    blt .LBB0_3
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    subs r0, r5, #1
+; CHECK-NEXT:    and r12, r5, #3
+; CHECK-NEXT:    cmp r0, #3
+; CHECK-NEXT:    bhs .LBB0_4
+; CHECK-NEXT:  @ %bb.2:
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    b .LBB0_6
+; CHECK-NEXT:  .LBB0_3:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    b .LBB0_9
+; CHECK-NEXT:  .LBB0_4: @ %for.body.preheader.new
+; CHECK-NEXT:    bic r0, r5, #3
+; CHECK-NEXT:    movs r2, #1
+; CHECK-NEXT:    subs r0, #4
+; CHECK-NEXT:    sub.w r3, r4, #16
+; CHECK-NEXT:    add.w lr, r2, r0, lsr #2
+; CHECK-NEXT:    movs r2, #0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:  .LBB0_5: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldr r5, [r3, #16]!
+; CHECK-NEXT:    adds r2, #4
+; CHECK-NEXT:    add r0, r5
+; CHECK-NEXT:    ldrd r5, r1, [r3, #4]
+; CHECK-NEXT:    ldr r6, [r3, #12]
+; CHECK-NEXT:    add r0, r5
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    add r0, r6
+; CHECK-NEXT:    le lr, .LBB0_5
+; CHECK-NEXT:  .LBB0_6: @ %for.cond.cleanup.loopexit.unr-lcssa
+; CHECK-NEXT:    cmp.w r12, #0
+; CHECK-NEXT:    beq .LBB0_9
+; CHECK-NEXT:  @ %bb.7: @ %for.body.epil
+; CHECK-NEXT:    ldr.w r3, [r4, r2, lsl #2]
+; CHECK-NEXT:    cmp.w r12, #1
+; CHECK-NEXT:    add r0, r3
+; CHECK-NEXT:    beq .LBB0_9
+; CHECK-NEXT:  @ %bb.8: @ %for.body.epil.1
+; CHECK-NEXT:    add.w r2, r4, r2, lsl #2
+; CHECK-NEXT:    cmp.w r12, #2
+; CHECK-NEXT:    ldr r1, [r2, #4]
+; CHECK-NEXT:    add r0, r1
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    ldrne r1, [r2, #8]
+; CHECK-NEXT:    addne r0, r1
+; CHECK-NEXT:  .LBB0_9: @ %for.cond.cleanup
+; CHECK-NEXT:    sub.w r4, r7, #12
+; CHECK-NEXT:    mov sp, r4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r12, lr}
+; CHECK-NEXT:    aut r12, lr, sp
+; CHECK-NEXT:    bx lr
 entry:
   %vla = alloca i32, i32 %n, align 4
   %call = call i32 @g(i32 %n, ptr nonnull %vla) #0
@@ -88,32 +172,6 @@ for.body.epil.2:                                  ; preds = %for.body.epil.1
   br label %for.cond.cleanup
 }
 
-; CHECK-LABEL: f:
-; CHECK:       pac    r12, lr, sp
-; CHECK-NEXT: .save   {r4, r5, r6, r7, lr}
-; CHECK-NEXT: push    {r4, r5, r6, r7, lr}
-; CHECK-NEXT: .cfi_def_cfa_offset 20
-; CHECK-NEXT: .cfi_offset lr, -4
-; CHECK-NEXT: .cfi_offset r7, -8
-; CHECK-NEXT: .cfi_offset r6, -12
-; CHECK-NEXT: .cfi_offset r5, -16
-; CHECK-NEXT: .cfi_offset r4, -20
-; CHECK-NEXT: .setfp r7, sp, #12
-; CHECK-NEXT: add    r7, sp, #12
-; CHECK-NEXT: .cfi_def_cfa r7, 8
-; CHECK-NEXT: .save    {r8, r9, ra_auth_code}
-; CHECK-NEXT: push.w   {r8, r9, r12}
-; CHECK-NEXT: .cfi_offset ra_auth_code, -24
-; CHECK-NEXT: .cfi_offset r9, -28
-; CHECK-NEXT: .cfi_offset r8, -32
-; ...
-; CHECK:      sub.w  r[[N:[0-9]*]], r7, #24
-; CHECK-NEXT: mov    sp, r[[N]]
-; CHECK-NEXT: pop.w  {r8, r9, r12}
-; CHECK-NEXT: pop.w  {r4, r5, r6, r7, lr}
-; CHECK-NEXT: aut    r12, lr, sp
-; CHECK-NEXT: bx     lr
-
 declare dso_local i32 @g(i32, ptr) local_unnamed_addr #0
 
 attributes #0 = { nounwind "sign-return-address"="non-leaf"}
diff --git a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll
index cef92f459e4aa37..24a08267db6fbf7 100644
--- a/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll
+++ b/llvm/test/CodeGen/WebAssembly/cfg-stackify-eh-legacy.ll
@@ -1,9 +1,9 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling
-; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling | FileCheck %s --check-prefix=NOOPT
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT
-; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory | FileCheck %s
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,bulk-memory
+; RUN: llc < %s -O0 -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -verify-machineinstrs -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory | FileCheck %s --check-prefix=NOOPT
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort -stats 2>&1 | FileCheck %s --check-prefix=NOSORT
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -disable-block-placement -verify-machineinstrs -fast-isel=false -machine-sink-split-probability-threshold=0 -cgp-freq-ratio-to-skip-merge=1000 -wasm-enable-eh -exception-model=wasm -mattr=+exception-handling,-bulk-memory -wasm-disable-ehpad-sort | FileCheck %s --check-prefix=NOSORT-LOCALS
 
 target triple = "wasm32-unknown-unknown"
 
diff --git a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll
index 77d1564409f78cc..ba10dd94a9838dc 100644
--- a/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll
+++ b/llvm/test/CodeGen/WebAssembly/target-features-cpus.ll
@@ -13,7 +13,10 @@ target triple = "wasm32-unknown-unknown"
 
 ; generic: +multivalue, +mutable-globals, +reference-types, +sign-ext
 ; GENERIC-LABEL: .custom_section.target_features,"",@
-; GENERIC-NEXT: .int8  4
+; GENERIC-NEXT: .int8  6
+; GENERIC-NEXT: .int8  43
+; GENERIC-NEXT: .int8  11
+; GENERIC-NEXT: .ascii  "bulk-memory"
 ; GENERIC-NEXT: .int8  43
 ; GENERIC-NEXT: .int8  10
 ; GENERIC-NEXT: .ascii  "multivalue"
@@ -21,6 +24,9 @@ target triple = "wasm32-unknown-unknown"
 ; GENERIC-NEXT: .int8  15
 ; GENERIC-NEXT: .ascii  "mutable-globals"
 ; GENERIC-NEXT: .int8  43
+; GENERIC-NEXT: .int8  19
+; GENERIC-NEXT: .ascii  "nontrapping-fptoint"
+; GENERIC-NEXT: .int8  43
 ; GENERIC-NEXT: .int8  15
 ; GENERIC-NEXT: .ascii  "reference-types"
 ; GENERIC-NEXT: .int8  43
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index 101e4ed008f7b6c..fc573fbd4fc99d3 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -7,47 +7,80 @@
 ; TODO - PR112425 - attempt to reconstruct andnot patterns through bitwise-agnostic operations
 
 declare void @use_i64(i64)
+declare void @use_i32(i32)
 
 ;
 ; Fold (and X, (rotl (not Y), Z))) -> (and X, (not (rotl Y, Z)))
 ;
 
 define i64 @andnot_rotl_i64(i64 %a0, i64 %a1, i64 %a2) nounwind {
-; X86-LABEL: andnot_rotl_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    notl %esi
-; X86-NEXT:    notl %edx
-; X86-NEXT:    testb $32, %cl
-; X86-NEXT:    jne .LBB0_1
-; X86-NEXT:  # %bb.2:
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    jmp .LBB0_3
-; X86-NEXT:  .LBB0_1:
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:  .LBB0_3:
-; X86-NEXT:    movl %esi, %edx
-; X86-NEXT:    shldl %cl, %eax, %edx
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shldl %cl, %esi, %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_rotl_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    jne .LBB0_1
+; X86-NOBMI-NEXT:  # %bb.2:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    jmp .LBB0_3
+; X86-NOBMI-NEXT:  .LBB0_1:
+; X86-NOBMI-NEXT:    movl %esi, %edx
+; X86-NOBMI-NEXT:    movl %eax, %esi
+; X86-NOBMI-NEXT:  .LBB0_3:
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    shldl %cl, %edx, %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shldl %cl, %esi, %edx
+; X86-NOBMI-NEXT:    notl %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotl_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    notq %rax
-; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NEXT:    rolq %cl, %rax
-; X64-NEXT:    andq %rdi, %rax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_rotl_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    testb $32, %cl
+; X86-BMI-NEXT:    jne .LBB0_1
+; X86-BMI-NEXT:  # %bb.2:
+; X86-BMI-NEXT:    movl %eax, %esi
+; X86-BMI-NEXT:    jmp .LBB0_3
+; X86-BMI-NEXT:  .LBB0_1:
+; X86-BMI-NEXT:    movl %edx, %esi
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:  .LBB0_3:
+; X86-BMI-NEXT:    movl %edx, %eax
+; X86-BMI-NEXT:    shldl %cl, %esi, %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI-NEXT:    shldl %cl, %edx, %esi
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %edx
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_rotl_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdx, %rcx
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    rolq %cl, %rax
+; X64-NOBMI-NEXT:    notq %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotl_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    movq %rdx, %rcx
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI-NEXT:    rolq %cl, %rsi
+; X64-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %rot = tail call i64 @llvm.fshl.i64(i64 %not, i64 %not, i64 %a2)
   %and = and i64 %rot, %a0
@@ -55,24 +88,40 @@ define i64 @andnot_rotl_i64(i64 %a0, i64 %a1, i64 %a2) nounwind {
 }
 
 define i32 @andnot_rotl_i32(i32 %a0, i32 %a1, i32 %a2) nounwind {
-; X86-LABEL: andnot_rotl_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    roll %cl, %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_rotl_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    roll %cl, %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotl_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    roll %cl, %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_rotl_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    roll %cl, %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_rotl_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    roll %cl, %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotl_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    movl %edx, %ecx
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI-NEXT:    roll %cl, %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %rot = tail call i32 @llvm.fshl.i32(i32 %not, i32 %not, i32 %a2)
   %and = and i32 %rot, %a0
@@ -83,23 +132,32 @@ define i16 @andnot_rotl_i16(i16 %a0, i16 %a1, i16 %a2) nounwind {
 ; X86-LABEL: andnot_rotl_i16:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw %cl, %ax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotl_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rolw %cl, %ax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-NOBMI-LABEL: andnot_rotl_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    rolw %cl, %ax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotl_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    movl %edx, %ecx
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI-NEXT:    rolw %cl, %si
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
   %rot = tail call i16 @llvm.fshl.i16(i16 %not, i16 %not, i16 %a2)
   %and = and i16 %rot, %a0
@@ -111,8 +169,8 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notb %al
 ; X86-NEXT:    rolb %cl, %al
+; X86-NEXT:    notb %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
@@ -120,9 +178,9 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
 ; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notb %al
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rolb %cl, %al
+; X64-NEXT:    notb %al
 ; X64-NEXT:    andb %dil, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -132,8 +190,8 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
   ret i8 %and
 }
 
-define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind {
-; X86-LABEL: andnot_rotl_i64_multiuse:
+define i64 @andnot_rotl_i64_multiuse_rot(i64 %a0, i64 %a1, i64 %a2) nounwind {
+; X86-LABEL: andnot_rotl_i64_multiuse_rot:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
@@ -171,7 +229,7 @@ define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind {
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotl_i64_multiuse:
+; X64-LABEL: andnot_rotl_i64_multiuse_rot:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    movq %rdx, %rcx
@@ -197,41 +255,73 @@ define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind {
 ;
 
 define i64 @andnot_rotr_i64(i64 %a0, i64 %a1, i64 %a2) nounwind {
-; X86-LABEL: andnot_rotr_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    notl %esi
-; X86-NEXT:    notl %edx
-; X86-NEXT:    testb $32, %cl
-; X86-NEXT:    je .LBB5_1
-; X86-NEXT:  # %bb.2:
-; X86-NEXT:    movl %edx, %eax
-; X86-NEXT:    jmp .LBB5_3
-; X86-NEXT:  .LBB5_1:
-; X86-NEXT:    movl %esi, %eax
-; X86-NEXT:    movl %edx, %esi
-; X86-NEXT:  .LBB5_3:
-; X86-NEXT:    movl %esi, %edx
-; X86-NEXT:    shrdl %cl, %eax, %edx
-; X86-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X86-NEXT:    shrdl %cl, %esi, %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    popl %esi
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_rotr_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    testb $32, %cl
+; X86-NOBMI-NEXT:    je .LBB5_1
+; X86-NOBMI-NEXT:  # %bb.2:
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    jmp .LBB5_3
+; X86-NOBMI-NEXT:  .LBB5_1:
+; X86-NOBMI-NEXT:    movl %esi, %edx
+; X86-NOBMI-NEXT:    movl %eax, %esi
+; X86-NOBMI-NEXT:  .LBB5_3:
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    shrdl %cl, %edx, %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NOBMI-NEXT:    shrdl %cl, %esi, %edx
+; X86-NOBMI-NEXT:    notl %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotr_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    notq %rax
-; X64-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-NEXT:    rorq %cl, %rax
-; X64-NEXT:    andq %rdi, %rax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_rotr_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    testb $32, %cl
+; X86-BMI-NEXT:    je .LBB5_1
+; X86-BMI-NEXT:  # %bb.2:
+; X86-BMI-NEXT:    movl %eax, %esi
+; X86-BMI-NEXT:    jmp .LBB5_3
+; X86-BMI-NEXT:  .LBB5_1:
+; X86-BMI-NEXT:    movl %edx, %esi
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:  .LBB5_3:
+; X86-BMI-NEXT:    movl %edx, %eax
+; X86-BMI-NEXT:    shrdl %cl, %esi, %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-BMI-NEXT:    shrdl %cl, %edx, %esi
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %esi, %edx
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_rotr_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdx, %rcx
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-NOBMI-NEXT:    rorq %cl, %rax
+; X64-NOBMI-NEXT:    notq %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotr_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    movq %rdx, %rcx
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-BMI-NEXT:    rorq %cl, %rsi
+; X64-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %rot = tail call i64 @llvm.fshr.i64(i64 %not, i64 %not, i64 %a2)
   %and = and i64 %rot, %a0
@@ -239,24 +329,40 @@ define i64 @andnot_rotr_i64(i64 %a0, i64 %a1, i64 %a2) nounwind {
 }
 
 define i32 @andnot_rotr_i32(i32 %a0, i32 %a1, i32 %a2) nounwind {
-; X86-LABEL: andnot_rotr_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    rorl %cl, %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_rotr_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    rorl %cl, %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotr_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rorl %cl, %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_rotr_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    rorl %cl, %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_rotr_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    rorl %cl, %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotr_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    movl %edx, %ecx
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI-NEXT:    rorl %cl, %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2)
   %and = and i32 %rot, %a0
@@ -267,23 +373,32 @@ define i16 @andnot_rotr_i16(i16 %a0, i16 %a1, i16 %a2) nounwind {
 ; X86-LABEL: andnot_rotr_i16:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rorw %cl, %ax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotr_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    rorw %cl, %ax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-NOBMI-LABEL: andnot_rotr_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    rorw %cl, %ax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotr_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    movl %edx, %ecx
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI-NEXT:    rorw %cl, %si
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
   %rot = tail call i16 @llvm.fshr.i16(i16 %not, i16 %not, i16 %a2)
   %and = and i16 %rot, %a0
@@ -295,8 +410,8 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notb %al
 ; X86-NEXT:    rorb %cl, %al
+; X86-NEXT:    notb %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
@@ -304,9 +419,9 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
 ; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notb %al
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
 ; X64-NEXT:    rorb %cl, %al
+; X64-NEXT:    notb %al
 ; X64-NEXT:    andb %dil, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -316,30 +431,115 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
   ret i8 %and
 }
 
+define i32 @andnot_rotr_i32_multiuse_not(i32 %a0, i32 %a1, i32 %a2) nounwind {
+; X86-NOBMI-LABEL: andnot_rotr_i32_multiuse_not:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    movl %eax, %esi
+; X86-NOBMI-NEXT:    rorl %cl, %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    calll use_i32@PLT
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: andnot_rotr_i32_multiuse_not:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    notl %edx
+; X86-BMI-NEXT:    rorl %cl, %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %esi
+; X86-BMI-NEXT:    pushl %edx
+; X86-BMI-NEXT:    calll use_i32@PLT
+; X86-BMI-NEXT:    addl $4, %esp
+; X86-BMI-NEXT:    movl %esi, %eax
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_rotr_i32_multiuse_not:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    movl %edx, %ecx
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    movl %esi, %ebx
+; X64-NOBMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NOBMI-NEXT:    rorl %cl, %ebx
+; X64-NOBMI-NEXT:    andl %edi, %ebx
+; X64-NOBMI-NEXT:    movl %esi, %edi
+; X64-NOBMI-NEXT:    callq use_i32@PLT
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_rotr_i32_multiuse_not:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    pushq %rbx
+; X64-BMI-NEXT:    movl %edx, %ecx
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    notl %eax
+; X64-BMI-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-BMI-NEXT:    rorl %cl, %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %ebx
+; X64-BMI-NEXT:    movl %eax, %edi
+; X64-BMI-NEXT:    callq use_i32@PLT
+; X64-BMI-NEXT:    movl %ebx, %eax
+; X64-BMI-NEXT:    popq %rbx
+; X64-BMI-NEXT:    retq
+  %not = xor i32 %a1, -1
+  %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2)
+  %and = and i32 %rot, %a0
+  call void @use_i32(i32 %not)
+  ret i32 %and
+}
+
 ;
 ; Fold (and X, (bswap (not Y)))) -> (and X, (not (bswap Y)))
 ;
 
 define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind {
-; X86-LABEL: andnot_bswap_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    notl %edx
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bswap_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %edx
+; X86-NOBMI-NEXT:    notl %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bswap_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    notq %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    andq %rdi, %rax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bswap_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    bswapl %ecx
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bswap_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    bswapq %rax
+; X64-NOBMI-NEXT:    notq %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    bswapq %rsi
+; X64-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %bswap = tail call i64 @llvm.bswap.i64(i64 %not)
   %and = and i64 %bswap, %a0
@@ -347,21 +547,34 @@ define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind {
 }
 
 define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind {
-; X86-LABEL: andnot_bswap_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bswap_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bswap_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bswap_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bswap_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    bswapl %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    bswapl %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %bswap = tail call i32 @llvm.bswap.i32(i32 %not)
   %and = and i32 %bswap, %a0
@@ -371,101 +584,273 @@ define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind {
 define i16 @andnot_bswap_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: andnot_bswap_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw $8, %ax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_bswap_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    rolw $8, %ax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-NOBMI-LABEL: andnot_bswap_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    rolw $8, %ax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    rolw $8, %si
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
   %bswap = tail call i16 @llvm.bswap.i16(i16 %not)
   %and = and i16 %bswap, %a0
   ret i16 %and
 }
 
-;
-; Fold (and X, (bitreverse (not Y)))) -> (and X, (not (bitreverse Y)))
-;
-
-define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
-; X86-LABEL: andnot_bitreverse_i64:
+define i32 @andnot_bswap_i32_multiuse_bswap(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: andnot_bswap_i32_multiuse_bswap:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    notl %eax
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    bswapl %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %edx
-; X86-NEXT:    shrl $4, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    orl %edx, %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    leal (%ecx,%edx,4), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; X86-NEXT:    shrl %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    leal (%ecx,%edx,2), %edx
 ; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    andl %eax, %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use_i32@PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_bitreverse_i64:
+; X64-LABEL: andnot_bswap_i32_multiuse_bswap:
 ; X64:       # %bb.0:
-; X64-NEXT:    notq %rsi
-; X64-NEXT:    bswapq %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    andq %rcx, %rsi
-; X64-NEXT:    shlq $4, %rsi
-; X64-NEXT:    orq %rax, %rsi
-; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT:    movq %rsi, %rcx
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq $2, %rsi
-; X64-NEXT:    andq %rax, %rsi
-; X64-NEXT:    leaq (%rsi,%rcx,4), %rax
-; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT:    movq %rax, %rdx
-; X64-NEXT:    andq %rcx, %rdx
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    leaq (%rax,%rdx,2), %rax
-; X64-NEXT:    andq %rdi, %rax
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    notl %esi
+; X64-NEXT:    bswapl %esi
+; X64-NEXT:    andl %esi, %ebx
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    callq use_i32@PLT
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
 ; X64-NEXT:    retq
+  %not = xor i32 %a1, -1
+  %bswap = tail call i32 @llvm.bswap.i32(i32 %not)
+  %and = and i32 %bswap, %a0
+  call void @use_i32(i32 %bswap)
+  ret i32 %and
+}
+
+define i32 @andnot_bswap_i32_multiuse_not(i32 %a0, i32 %a1) nounwind {
+; X86-NOBMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    movl %eax, %esi
+; X86-NOBMI-NEXT:    bswapl %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    calll use_i32@PLT
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %esi
+; X86-BMI-NEXT:    pushl %ecx
+; X86-BMI-NEXT:    calll use_i32@PLT
+; X86-BMI-NEXT:    addl $4, %esp
+; X86-BMI-NEXT:    movl %esi, %eax
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    movl %esi, %ebx
+; X64-NOBMI-NEXT:    bswapl %ebx
+; X64-NOBMI-NEXT:    andl %edi, %ebx
+; X64-NOBMI-NEXT:    movl %esi, %edi
+; X64-NOBMI-NEXT:    callq use_i32@PLT
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    pushq %rbx
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    notl %eax
+; X64-BMI-NEXT:    bswapl %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %ebx
+; X64-BMI-NEXT:    movl %eax, %edi
+; X64-BMI-NEXT:    callq use_i32@PLT
+; X64-BMI-NEXT:    movl %ebx, %eax
+; X64-BMI-NEXT:    popq %rbx
+; X64-BMI-NEXT:    retq
+  %not = xor i32 %a1, -1
+  %bswap = tail call i32 @llvm.bswap.i32(i32 %not)
+  %and = and i32 %bswap, %a0
+  call void @use_i32(i32 %not)
+  ret i32 %and
+}
+
+;
+; Fold (and X, (bitreverse (not Y)))) -> (and X, (not (bitreverse Y)))
+;
+
+define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
+; X86-NOBMI-LABEL: andnot_bitreverse_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %edx
+; X86-NOBMI-NEXT:    shrl $4, %eax
+; X86-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %edx, %eax
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %eax
+; X86-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%eax,%edx,4), %eax
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %eax
+; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%eax,%edx,2), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %edx
+; X86-NOBMI-NEXT:    shrl $4, %ecx
+; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %edx, %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %ecx
+; X86-NOBMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %ecx
+; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%ecx,%edx,2), %edx
+; X86-NOBMI-NEXT:    notl %edx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: andnot_bitreverse_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    shll $4, %edx
+; X86-BMI-NEXT:    shrl $4, %eax
+; X86-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    orl %edx, %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-BMI-NEXT:    shrl $2, %eax
+; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-BMI-NEXT:    leal (%eax,%edx,4), %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    shrl %eax
+; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%edx,2), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    bswapl %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    shll $4, %edx
+; X86-BMI-NEXT:    shrl $4, %ecx
+; X86-BMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    orl %edx, %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-BMI-NEXT:    shrl $2, %ecx
+; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-BMI-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    shrl %ecx
+; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%ecx,%edx,2), %ecx
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bitreverse_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    bswapq %rsi
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    shrq $4, %rax
+; X64-NOBMI-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NOBMI-NEXT:    andq %rcx, %rax
+; X64-NOBMI-NEXT:    andq %rcx, %rsi
+; X64-NOBMI-NEXT:    shlq $4, %rsi
+; X64-NOBMI-NEXT:    orq %rax, %rsi
+; X64-NOBMI-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    andq %rax, %rcx
+; X64-NOBMI-NEXT:    shrq $2, %rsi
+; X64-NOBMI-NEXT:    andq %rax, %rsi
+; X64-NOBMI-NEXT:    leaq (%rsi,%rcx,4), %rax
+; X64-NOBMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NOBMI-NEXT:    movq %rax, %rdx
+; X64-NOBMI-NEXT:    andq %rcx, %rdx
+; X64-NOBMI-NEXT:    shrq %rax
+; X64-NOBMI-NEXT:    andq %rcx, %rax
+; X64-NOBMI-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-NOBMI-NEXT:    notq %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bitreverse_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    bswapq %rsi
+; X64-BMI-NEXT:    movq %rsi, %rax
+; X64-BMI-NEXT:    shrq $4, %rax
+; X64-BMI-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-BMI-NEXT:    andq %rcx, %rax
+; X64-BMI-NEXT:    andq %rcx, %rsi
+; X64-BMI-NEXT:    shlq $4, %rsi
+; X64-BMI-NEXT:    orq %rax, %rsi
+; X64-BMI-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BMI-NEXT:    movq %rsi, %rcx
+; X64-BMI-NEXT:    andq %rax, %rcx
+; X64-BMI-NEXT:    shrq $2, %rsi
+; X64-BMI-NEXT:    andq %rax, %rsi
+; X64-BMI-NEXT:    leaq (%rsi,%rcx,4), %rax
+; X64-BMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT:    movq %rax, %rdx
+; X64-BMI-NEXT:    andq %rcx, %rdx
+; X64-BMI-NEXT:    shrq %rax
+; X64-BMI-NEXT:    andq %rcx, %rax
+; X64-BMI-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-BMI-NEXT:    andnq %rdi, %rax, %rax
+; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not)
   %and = and i64 %bitrev, %a0
@@ -473,53 +858,99 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 }
 
 define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
-; X86-LABEL: andnot_bitreverse_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bitreverse_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %ecx
+; X86-NOBMI-NEXT:    shrl $4, %eax
+; X86-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %ecx, %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %eax
+; X86-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %eax
+; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bitreverse_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    notl %esi
-; X64-NEXT:    bswapl %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %esi
-; X64-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %esi
-; X64-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; X64-NEXT:    leal (%rsi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bitreverse_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    shll $4, %ecx
+; X86-BMI-NEXT:    shrl $4, %eax
+; X86-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    orl %ecx, %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-BMI-NEXT:    shrl $2, %eax
+; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-BMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT:    shrl %eax
+; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bitreverse_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NOBMI-NEXT:    bswapl %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NOBMI-NEXT:    shll $4, %eax
+; X64-NOBMI-NEXT:    shrl $4, %esi
+; X64-NOBMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
+; X64-NOBMI-NEXT:    orl %eax, %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-NOBMI-NEXT:    shrl $2, %esi
+; X64-NOBMI-NEXT:    andl $858993459, %esi # imm = 0x33333333
+; X64-NOBMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-NOBMI-NEXT:    movl %eax, %ecx
+; X64-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X64-NOBMI-NEXT:    shrl %eax
+; X64-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-NOBMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bitreverse_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI-NEXT:    bswapl %esi
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-BMI-NEXT:    shll $4, %eax
+; X64-BMI-NEXT:    shrl $4, %esi
+; X64-BMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
+; X64-BMI-NEXT:    orl %eax, %esi
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-BMI-NEXT:    shrl $2, %esi
+; X64-BMI-NEXT:    andl $858993459, %esi # imm = 0x33333333
+; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X64-BMI-NEXT:    shrl %eax
+; X64-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not)
   %and = and i32 %bitrev, %a0
@@ -529,8 +960,7 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: andnot_bitreverse_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw $8, %ax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
@@ -548,34 +978,59 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    andl $21845, %eax # imm = 0x5555
 ; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_bitreverse_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    notl %esi
-; X64-NEXT:    rolw $8, %si
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %esi
-; X64-NEXT:    andl $3855, %esi # imm = 0xF0F
-; X64-NEXT:    orl %eax, %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-NEXT:    shrl $2, %esi
-; X64-NEXT:    andl $13107, %esi # imm = 0x3333
-; X64-NEXT:    leal (%rsi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $21845, %ecx # imm = 0x5555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $21845, %eax # imm = 0x5555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-NOBMI-LABEL: andnot_bitreverse_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NOBMI-NEXT:    rolw $8, %si
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-NOBMI-NEXT:    shll $4, %eax
+; X64-NOBMI-NEXT:    shrl $4, %esi
+; X64-NOBMI-NEXT:    andl $3855, %esi # imm = 0xF0F
+; X64-NOBMI-NEXT:    orl %eax, %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-NOBMI-NEXT:    shrl $2, %esi
+; X64-NOBMI-NEXT:    andl $13107, %esi # imm = 0x3333
+; X64-NOBMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-NOBMI-NEXT:    movl %eax, %ecx
+; X64-NOBMI-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X64-NOBMI-NEXT:    shrl %eax
+; X64-NOBMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-NOBMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bitreverse_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI-NEXT:    rolw $8, %si
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-BMI-NEXT:    shll $4, %eax
+; X64-BMI-NEXT:    shrl $4, %esi
+; X64-BMI-NEXT:    andl $3855, %esi # imm = 0xF0F
+; X64-BMI-NEXT:    orl %eax, %esi
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-BMI-NEXT:    shrl $2, %esi
+; X64-BMI-NEXT:    andl $13107, %esi # imm = 0x3333
+; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X64-BMI-NEXT:    shrl %eax
+; X64-BMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
   %bitrev = tail call i16 @llvm.bitreverse.i16(i16 %not)
   %and = and i16 %bitrev, %a0
@@ -586,7 +1041,6 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: andnot_bitreverse_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notb %al
 ; X86-NEXT:    rolb $4, %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andb $51, %cl
@@ -600,12 +1054,12 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    andb $85, %al
 ; X86-NEXT:    orb %cl, %al
+; X86-NEXT:    notb %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: andnot_bitreverse_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    notb %sil
 ; X64-NEXT:    rolb $4, %sil
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    andb $51, %al
@@ -619,6 +1073,7 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
 ; X64-NEXT:    shrb %al
 ; X64-NEXT:    andb $85, %al
 ; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    notb %al
 ; X64-NEXT:    andb %dil, %al
 ; X64-NEXT:    retq
   %not = xor i8 %a1, -1
@@ -626,8 +1081,3 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
   %and = and i8 %bitrev, %a0
   ret i8 %and
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; X64-BMI: {{.*}}
-; X64-NOBMI: {{.*}}
-; X86-BMI: {{.*}}
-; X86-NOBMI: {{.*}}
diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
new file mode 100644
index 000000000000000..de0bec7ea2695a8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86
+
+define i1 @hoeq(half %x, half %y) {
+; X64-LABEL: hoeq:
+; X64:       # %bb.0:
+; X64-NEXT:    vucomxsh %xmm1, %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: hoeq:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+    %1 = fcmp oeq half %x, %y
+    ret i1 %1
+}
+
+define i1 @hune(half %x, half %y) {
+; X64-LABEL: hune:
+; X64:       # %bb.0:
+; X64-NEXT:    vucomxsh %xmm1, %xmm0
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: hune:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    vucomxsh {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+    %1 = fcmp une half %x, %y
+    ret i1 %1
+}
+
+define i1 @hoeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hoeq_mem:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    vucomxsh (%rsi), %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: hoeq_mem:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    vucomxsh (%eax), %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+    %x = load half, ptr %xp
+    %y = load half, ptr %yp
+    %1 = fcmp oeq half %x, %y
+    ret i1 %1
+}
+
+define i1 @hune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: hune_mem:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X64-NEXT:    vucomxsh (%rsi), %xmm0
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: hune_mem:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; X86-NEXT:    vucomxsh (%eax), %xmm0
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+    %x = load half, ptr %xp
+    %y = load half, ptr %yp
+    %1 = fcmp une half %x, %y
+    ret i1 %1
+}
+
+define i1 @foeq(float %x, float %y) {
+; X64-LABEL: foeq:
+; X64:       # %bb.0:
+; X64-NEXT:    vucomxss %xmm1, %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: foeq:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+    %1 = fcmp oeq float %x, %y
+    ret i1 %1
+}
+
+define i1 @fune(float %x, float %y) {
+; X64-LABEL: fune:
+; X64:       # %bb.0:
+; X64-NEXT:    vucomxss %xmm1, %xmm0
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: fune:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vucomxss {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+    %1 = fcmp une float %x, %y
+    ret i1 %1
+}
+
+define i1 @foeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: foeq_mem:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    vucomxss (%rsi), %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: foeq_mem:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vucomxss (%eax), %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+    %x = load float, ptr %xp
+    %y = load float, ptr %yp
+    %1 = fcmp oeq float %x, %y
+    ret i1 %1
+}
+
+define i1 @fune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: fune_mem:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X64-NEXT:    vucomxss (%rsi), %xmm0
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: fune_mem:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vucomxss (%eax), %xmm0
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+    %x = load float, ptr %xp
+    %y = load float, ptr %yp
+    %1 = fcmp une float %x, %y
+    ret i1 %1
+}
+
+define i1 @doeq(double %x, double %y) {
+; X64-LABEL: doeq:
+; X64:       # %bb.0:
+; X64-NEXT:    vucomxsd %xmm1, %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: doeq:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+    %1 = fcmp oeq double %x, %y
+    ret i1 %1
+}
+
+define i1 @dune(double %x, double %y) {
+; X64-LABEL: dune:
+; X64:       # %bb.0:
+; X64-NEXT:    vucomxsd %xmm1, %xmm0
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: dune:
+; X86:       # %bb.0:
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vucomxsd {{[0-9]+}}(%esp), %xmm0
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+    %1 = fcmp une double %x, %y
+    ret i1 %1
+}
+
+define i1 @doeq_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: doeq_mem:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    vucomxsd (%rsi), %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: doeq_mem:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vucomxsd (%eax), %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+    %x = load double, ptr %xp
+    %y = load double, ptr %yp
+    %1 = fcmp oeq double %x, %y
+    ret i1 %1
+}
+
+define i1 @dune_mem(ptr %xp, ptr %yp) {
+; X64-LABEL: dune_mem:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT:    vucomxsd (%rsi), %xmm0
+; X64-NEXT:    setne %al
+; X64-NEXT:    retq
+;
+; X86-LABEL: dune_mem:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vucomxsd (%eax), %xmm0
+; X86-NEXT:    setne %al
+; X86-NEXT:    retl
+    %x = load double, ptr %xp
+    %y = load double, ptr %yp
+    %1 = fcmp une double %x, %y
+    ret i1 %1
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
index 7b81d547db085c5..5f2bcf0556b021e 100644
--- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll
@@ -76,13 +76,15 @@ declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32)
 define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vfpclasspbf16 $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x02]
-; CHECK-NEXT:    vfpclasspbf16 $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT:    vfpclasspbf16 $6, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x06]
+; CHECK-NEXT:    # k1 = isPositiveZero(zmm0) | isNegativeZero(zmm0)
+; CHECK-NEXT:    vfpclasspbf16 $0, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x00]
+; CHECK-NEXT:    # k0 {%k1} = false
 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 4)
-  %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 2)
+  %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 0)
+  %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 6)
   %1 = and <32 x i1> %res1, %res
   %2 = bitcast <32 x i1> %1 to i32
   ret i32 %2
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
index e0f5679e8ac96dd..c97d27ff324bbbf 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-arith.ll
@@ -1166,3 +1166,25 @@ entry:
   %2 = select <8 x i1> %1, <8 x bfloat> %0, <8 x bfloat> zeroinitializer
   ret <8 x bfloat> %2
 }
+
+define <32 x bfloat> @addv(<32 x bfloat> %a, <32 x bfloat> %b) nounwind {
+; X64-LABEL: addv:
+; X64:       # %bb.0:
+; X64-NEXT:    vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
+; X64-NEXT:    vaddnepbf16 %ymm3, %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0xcb]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: addv:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp # encoding: [0x55]
+; X86-NEXT:    movl %esp, %ebp # encoding: [0x89,0xe5]
+; X86-NEXT:    andl $-32, %esp # encoding: [0x83,0xe4,0xe0]
+; X86-NEXT:    subl $32, %esp # encoding: [0x83,0xec,0x20]
+; X86-NEXT:    vaddnepbf16 %ymm2, %ymm0, %ymm0 # encoding: [0x62,0xf5,0x7d,0x28,0x58,0xc2]
+; X86-NEXT:    vaddnepbf16 8(%ebp), %ymm1, %ymm1 # encoding: [0x62,0xf5,0x75,0x28,0x58,0x8d,0x08,0x00,0x00,0x00]
+; X86-NEXT:    movl %ebp, %esp # encoding: [0x89,0xec]
+; X86-NEXT:    popl %ebp # encoding: [0x5d]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %add = fadd <32 x bfloat> %a, %b
+  ret <32 x bfloat> %add
+}
diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
index 559d866b55cc7b6..59151d4dd96099e 100644
--- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll
@@ -298,6 +298,7 @@ define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04]
 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -313,6 +314,7 @@ define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(ymm0)
 ; CHECK-NEXT:    vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04]
 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll
index 64063bdf8333eb5..53193597d62f08f 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll
@@ -7,7 +7,7 @@
 define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> %__A) {
 ; X86-LABEL: test_mm512_mask_fpclass_pd_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    vfpclasspd $4, %zmm0, %k0
+; X86-NEXT:    vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0)
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
@@ -16,7 +16,7 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double>
 ;
 ; X64-LABEL: test_mm512_mask_fpclass_pd_mask:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vfpclasspd $4, %zmm0, %k0
+; X64-NEXT:    vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0)
 ; X64-NEXT:    kmovw %k0, %eax
 ; X64-NEXT:    andb %dil, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
@@ -35,7 +35,7 @@ declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32)
 define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) {
 ; CHECK-LABEL: test_mm512_fpclass_pd_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0
+; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    vzeroupper
@@ -49,7 +49,7 @@ entry:
 define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x float> %__A) {
 ; X86-LABEL: test_mm512_mask_fpclass_ps_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    vfpclassps $4, %zmm0, %k0
+; X86-NEXT:    vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0)
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -58,7 +58,7 @@ define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x floa
 ;
 ; X64-LABEL: test_mm512_mask_fpclass_ps_mask:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vfpclassps $4, %zmm0, %k0
+; X64-NEXT:    vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0)
 ; X64-NEXT:    kmovw %k0, %eax
 ; X64-NEXT:    andl %edi, %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
@@ -77,7 +77,7 @@ declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32)
 define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) {
 ; CHECK-LABEL: test_mm512_fpclass_ps_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0
+; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    vzeroupper
@@ -91,7 +91,7 @@ entry:
 define zeroext i8 @test_mm_fpclass_sd_mask(<4 x float> %__A) {
 ; CHECK-LABEL: test_mm_fpclass_sd_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0
+; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    ret{{[l|q]}}
@@ -107,7 +107,7 @@ define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__
 ; X86-LABEL: test_mm_mask_fpclass_sd_mask:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1}
+; X86-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0)
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
@@ -115,7 +115,7 @@ define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__
 ; X64-LABEL: test_mm_mask_fpclass_sd_mask:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    kmovw %edi, %k1
-; X64-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1}
+; X64-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0)
 ; X64-NEXT:    kmovw %k0, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
@@ -128,7 +128,7 @@ entry:
 define zeroext i8 @test_mm_fpclass_ss_mask(<4 x float> %__A) {
 ; CHECK-LABEL: test_mm_fpclass_ss_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclassss $2, %xmm0, %k0
+; CHECK-NEXT:    vfpclassss $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    ret{{[l|q]}}
@@ -143,7 +143,7 @@ define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__
 ; X86-LABEL: test_mm_mask_fpclass_ss_mask:
 ; X86:       # %bb.0: # %entry
 ; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
-; X86-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1}
+; X86-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0)
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
@@ -151,7 +151,7 @@ define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__
 ; X64-LABEL: test_mm_mask_fpclass_ss_mask:
 ; X64:       # %bb.0: # %entry
 ; X64-NEXT:    kmovw %edi, %k1
-; X64-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1}
+; X64-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0)
 ; X64-NEXT:    kmovw %k0, %eax
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
index 23e929aa9d89b1c..8a0428d022b6d7f 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
@@ -654,7 +654,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(zmm0)
 ; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT:    # k0 {%k1} = isNegativeZero(zmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
@@ -669,7 +671,9 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(zmm0)
 ; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT:    # k0 {%k1} = isNegativeZero(zmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
index 027bca9c8badf9c..70f60c802a2d521 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -726,7 +726,9 @@ define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(zmm0)
 ; CHECK-NEXT:    vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT:    # k0 {%k1} = isNegativeZero(zmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
@@ -743,7 +745,9 @@ define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(zmm0)
 ; CHECK-NEXT:    vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04]
+; CHECK-NEXT:    # k0 {%k1} = isNegativeZero(zmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
@@ -761,7 +765,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasssd $4, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(xmm0)
 ; CHECK-NEXT:    vfpclasssd $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x02]
+; CHECK-NEXT:    # k0 {%k1} = isPositiveZero(xmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
@@ -775,6 +781,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd_load(ptr %x0ptr) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    vfpclasssd $4, (%eax), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x00,0x04]
+; X86-NEXT:    # k0 = isNegativeZero(mem)
 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -782,6 +789,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd_load(ptr %x0ptr) {
 ; X64-LABEL: test_int_x86_avx512_mask_fpclass_sd_load:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vfpclasssd $4, (%rdi), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x07,0x04]
+; X64-NEXT:    # k0 = isNegativeZero(mem)
 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq # encoding: [0xc3]
@@ -796,7 +804,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassss $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(xmm0)
 ; CHECK-NEXT:    vfpclassss $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x02]
+; CHECK-NEXT:    # k0 {%k1} = isPositiveZero(xmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
@@ -810,6 +820,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(ptr %x0ptr, i8 %x1) {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
 ; X86-NEXT:    vfpclassss $4, (%eax), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x00,0x04]
+; X86-NEXT:    # k0 = isNegativeZero(mem)
 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl # encoding: [0xc3]
@@ -817,6 +828,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(ptr %x0ptr, i8 %x1) {
 ; X64-LABEL: test_int_x86_avx512_mask_fpclass_ss_load:
 ; X64:       # %bb.0:
 ; X64-NEXT:    vfpclassss $4, (%rdi), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x07,0x04]
+; X64-NEXT:    # k0 = isNegativeZero(mem)
 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq # encoding: [0xc3]
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll
index 703591acef57207..a8a38d9c4811331 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll
@@ -235,7 +235,7 @@ declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32)
 define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) {
 ; CHECK-LABEL: test_mm_fpclass_pd_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0
+; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    ret{{[l|q]}}
@@ -279,7 +279,7 @@ declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32)
 define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) {
 ; CHECK-LABEL: test_mm256_fpclass_pd_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0
+; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    vzeroupper
@@ -322,7 +322,7 @@ declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32)
 define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) {
 ; CHECK-LABEL: test_mm_fpclass_ps_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0
+; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    ret{{[l|q]}}
@@ -336,7 +336,7 @@ entry:
 define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) {
 ; X86-LABEL: test_mm256_mask_fpclass_ps_mask:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    vfpclassps $2, %ymm0, %k0
+; X86-NEXT:    vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
 ; X86-NEXT:    kmovw %k0, %eax
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    # kill: def $al killed $al killed $eax
@@ -345,7 +345,7 @@ define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float>
 ;
 ; X64-LABEL: test_mm256_mask_fpclass_ps_mask:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    vfpclassps $2, %ymm0, %k0
+; X64-NEXT:    vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
 ; X64-NEXT:    kmovw %k0, %eax
 ; X64-NEXT:    andb %dil, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
@@ -364,7 +364,7 @@ declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32)
 define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) {
 ; CHECK-LABEL: test_mm256_fpclass_ps_mask:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0
+; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0)
 ; CHECK-NEXT:    kmovw %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
index 8d609eb7fdd0092..f31dafcd6862695 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll
@@ -2921,6 +2921,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(xmm0)
 ; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -2936,6 +2937,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(ymm0)
 ; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -2952,6 +2954,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -2967,6 +2970,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(ymm0)
 ; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
index 3b9f96ef452c323..ec94b593148dfac 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
@@ -1500,6 +1500,7 @@ define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(xmm0)
 ; CHECK-NEXT:    vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -1518,6 +1519,7 @@ define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(ymm0)
 ; CHECK-NEXT:    vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -1536,6 +1538,7 @@ define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02]
+; CHECK-NEXT:    # k1 = isPositiveZero(xmm0)
 ; CHECK-NEXT:    vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
@@ -1554,6 +1557,7 @@ define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) {
 ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04]
+; CHECK-NEXT:    # k1 = isNegativeZero(ymm0)
 ; CHECK-NEXT:    vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02]
 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
index b311c8831457b80..ef87ac31fcf48cb 100644
--- a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll
@@ -215,3 +215,117 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) {
 }
 declare <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A)
 
+define <8 x bfloat> @select(i8 %x, <8 x bfloat> %y) nounwind {
+; X64-LABEL: select:
+; X64:       # %bb.0:
+; X64-NEXT:    vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X64-NEXT:    movb %dil, %al # encoding: [0x40,0x88,0xf8]
+; X64-NEXT:    movb %al, -{{[0-9]+}}(%rsp) # encoding: [0x88,0x44,0x24,0xff]
+; X64-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0xff]
+; X64-NEXT:    movl %eax, %ecx # encoding: [0x89,0xc1]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb %cl # encoding: [0xd0,0xe9]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X64-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X64-NEXT:    shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X64-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X64-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X64-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X64-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X64-NEXT:    shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X64-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X64-NEXT:    negl %eax # encoding: [0xf7,0xd8]
+; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X64-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: select:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %eax # encoding: [0x50]
+; X86-NEXT:    vmovaps %xmm0, %xmm1 # encoding: [0xc5,0xf8,0x28,0xc8]
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x08]
+; X86-NEXT:    movb %al, {{[0-9]+}}(%esp) # encoding: [0x88,0x44,0x24,0x03]
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x03]
+; X86-NEXT:    movl %eax, %ecx # encoding: [0x89,0xc1]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1]
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb %cl # encoding: [0xd0,0xe9]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $2, %cl # encoding: [0xc0,0xe9,0x02]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $3, %cl # encoding: [0xc0,0xe9,0x03]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $4, %cl # encoding: [0xc0,0xe9,0x04]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $5, %cl # encoding: [0xc0,0xe9,0x05]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
+; X86-NEXT:    movb %al, %cl # encoding: [0x88,0xc1]
+; X86-NEXT:    shrb $6, %cl # encoding: [0xc0,0xe9,0x06]
+; X86-NEXT:    movzbl %cl, %ecx # encoding: [0x0f,0xb6,0xc9]
+; X86-NEXT:    andl $1, %ecx # encoding: [0x83,0xe1,0x01]
+; X86-NEXT:    negl %ecx # encoding: [0xf7,0xd9]
+; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
+; X86-NEXT:    shrb $7, %al # encoding: [0xc0,0xe8,0x07]
+; X86-NEXT:    movzbl %al, %eax # encoding: [0x0f,0xb6,0xc0]
+; X86-NEXT:    negl %eax # encoding: [0xf7,0xd8]
+; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; X86-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdf,0xc1]
+; X86-NEXT:    popl %eax # encoding: [0x58]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %1 = bitcast i8 %x to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x bfloat> zeroinitializer, <8 x bfloat> %y
+  ret <8 x bfloat> %2
+}
diff --git a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
index 73fe4f6ffedb0e1..fca5aa046b03b95 100644
--- a/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
+++ b/llvm/test/CodeGen/X86/basic-block-address-map-pgo-features.ll
@@ -1,14 +1,20 @@
 ; Check the basic block sections labels option
-; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=none | FileCheck %s --check-prefixes=CHECK,BASIC,PGO-NONE
 
 ;; Also verify this holds for all PGO features enabled
 ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count,bb-freq,br-prob | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
+; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=all | FileCheck %s --check-prefixes=CHECK,PGO-ALL,PGO-FEC,PGO-BBF,PGO-BRP
 
 ;; Also verify that pgo extension only includes the enabled feature
 ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=func-entry-count | FileCheck %s --check-prefixes=CHECK,PGO-FEC,FEC-ONLY
 ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=bb-freq | FileCheck %s --check-prefixes=CHECK,PGO-BBF,BBF-ONLY
 ; RUN: llc < %s -mtriple=x86_64 -function-sections -unique-section-names=true -basic-block-address-map -pgo-analysis-map=br-prob | FileCheck %s --check-prefixes=CHECK,PGO-BRP,BRP-ONLY
 
+;; Verify that we emit an error if we try and specify values in addition to all/none
+; RUN: not llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=all,bb-freq
+; RUN: not llc < %s -mtriple=x86_64 -basic-block-address-map -pgo-analysis-map=none,bb-freq
+
 
 define void @_Z3bazb(i1 zeroext, i1 zeroext) personality ptr @__gxx_personality_v0 !prof !0  {
   br i1 %0, label %3, label %8, !prof !1
@@ -93,6 +99,9 @@ declare i32 @__gxx_personality_v0(...)
 ; CHECK-NEXT:	.byte	4
 
 ;; PGO Analysis Map
+; PGO-NONE-NOT: .byte	100		# function entry count
+; PGO-NONE-NOT: .ascii	"\271\235\376\332\245\200\356\017"	# basic block frequency
+; PGO-NONE-NOT: .byte	2		# basic block successor count
 ; PGO-FEC-NEXT:	.byte	100		# function entry count
 ; PGO-BBF-NEXT:	.ascii	"\271\235\376\332\245\200\356\017"	# basic block frequency
 ; PGO-BRP-NEXT:	.byte	2		# basic block successor count
diff --git a/llvm/test/CodeGen/X86/bfloat-constrained.ll b/llvm/test/CodeGen/X86/bfloat-constrained.ll
index 0a8c4f20648b059..081b1cebfc43d62 100644
--- a/llvm/test/CodeGen/X86/bfloat-constrained.ll
+++ b/llvm/test/CodeGen/X86/bfloat-constrained.ll
@@ -86,7 +86,7 @@ define void @float_to_bfloat(float %0) strictfp {
 ; X64-NEXT:    popq %rax
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
-  %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   store bfloat %2, ptr @a, align 2
   ret void
 }
@@ -115,7 +115,7 @@ define void @double_to_bfloat(double %0) strictfp {
 ; X64-NEXT:    popq %rax
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
-  %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %2 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   store bfloat %2, ptr @a, align 2
   ret void
 }
@@ -162,20 +162,20 @@ define void @add() strictfp {
 ; X64-NEXT:    .cfi_def_cfa_offset 8
 ; X64-NEXT:    retq
   %1 = load bfloat, ptr @a, align 2
-  %2 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %1, metadata !"fpexcept.strict") #0
+  %2 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %1, metadata !"fpexcept.strict") #0
   %3 = load bfloat, ptr @b, align 2
-  %4 = tail call float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat %3, metadata !"fpexcept.strict") #0
+  %4 = tail call float @llvm.experimental.constrained.fpext.f32.bf16(bfloat %3, metadata !"fpexcept.strict") #0
   %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
-  %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  %6 = tail call bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
   store bfloat %6, ptr @c, align 2
   ret void
 }
 
-declare float @llvm.experimental.constrained.fpext.f32.bfloat(bfloat, metadata)
-declare double @llvm.experimental.constrained.fpext.f64.bfloat(bfloat, metadata)
+declare float @llvm.experimental.constrained.fpext.f32.bf16(bfloat, metadata)
+declare double @llvm.experimental.constrained.fpext.f64.bf16(bfloat, metadata)
 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
-declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f32(float, metadata, metadata)
-declare bfloat @llvm.experimental.constrained.fptrunc.bfloat.f64(double, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f32(float, metadata, metadata)
+declare bfloat @llvm.experimental.constrained.fptrunc.bf16.f64(double, metadata, metadata)
 
 attributes #0 = { strictfp }
 
diff --git a/llvm/test/CodeGen/X86/float-strict-powi-convert.ll b/llvm/test/CodeGen/X86/float-strict-powi-convert.ll
index 4d0cffc53d93af7..b39f5ec667cecda 100644
--- a/llvm/test/CodeGen/X86/float-strict-powi-convert.ll
+++ b/llvm/test/CodeGen/X86/float-strict-powi-convert.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=x86_64-pc-windows-msvc %s -o - | FileCheck %s -check-prefix=WIN
 ; RUN: llc -mtriple=x86_64-pc-linux %s -o -| FileCheck %s -check-prefix=UNIX
 
-declare float @llvm.experimental.constrained.powi.f32.i32(float, i32, metadata, metadata)
+declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, metadata)
 
 define float @powi_f64(float %a, i32 %b) nounwind strictfp {
 ; WIN-LABEL: powi_f64:
@@ -19,6 +19,6 @@ define float @powi_f64(float %a, i32 %b) nounwind strictfp {
 ; UNIX-NEXT:    callq __powisf2@PLT
 ; UNIX-NEXT:    popq %rax
 ; UNIX-NEXT:    retq
-  %1 = call float @llvm.experimental.constrained.powi.f32.i32(float %a, i32 %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") strictfp
+  %1 = call float @llvm.experimental.constrained.powi.f32(float %a, i32 %b, metadata !"round.tonearest", metadata !"fpexcept.ignore") strictfp
   ret float %1
 }
diff --git a/llvm/test/CodeGen/X86/scmp.ll b/llvm/test/CodeGen/X86/scmp.ll
index 0746a07d2cdf268..5ae5caf3e88b200 100644
--- a/llvm/test/CodeGen/X86/scmp.ll
+++ b/llvm/test/CodeGen/X86/scmp.ll
@@ -848,7 +848,7 @@ define <16 x i32> @scmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; AVX512-NEXT:    vpcmpgtb %xmm0, %xmm1, %k1
 ; AVX512-NEXT:    vpcmpgtb %xmm1, %xmm0, %k2
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
 ; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 ; AVX512-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 000000000000000..825a11d66cd4523
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
index ed7109c416e7fda..9382ba31ab649d6 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
@@ -234,6 +234,7 @@ define i32 @stack_fold_fpclassph(<32 x half> %a0) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    vfpclassphz $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 64-byte Folded Reload
+; CHECK-NEXT:    # k0 = isNegativeZero(mem)
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
@@ -253,6 +254,7 @@ define i32 @stack_fold_fpclassph_mask(<32 x half> %a0, ptr %p) {
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    kmovd (%rdi), %k1
 ; CHECK-NEXT:    vfpclassphz $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 {%k1} # 64-byte Folded Reload
+; CHECK-NEXT:    # k0 {%k1} = isNegativeZero(mem)
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
@@ -272,6 +274,7 @@ define i8 @stack_fold_fpclasssh(<8 x half> %a0) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    vfpclasssh $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 16-byte Folded Reload
+; CHECK-NEXT:    # k0 = isNegativeZero(mem)
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -290,6 +293,7 @@ define i8 @stack_fold_fpclasssh_mask(<8 x half> %a0, ptr %p) {
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    kmovb (%rdi), %k1
 ; CHECK-NEXT:    vfpclasssh $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 {%k1} # 16-byte Folded Reload
+; CHECK-NEXT:    # k0 {%k1} = isNegativeZero(mem)
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
index e2ed997783f59b8..3386f4a9b519813 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll
@@ -110,6 +110,7 @@ define i8 @stack_fold_fpclassph(<8 x half> %a0) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    vfpclassphx $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 16-byte Folded Reload
+; CHECK-NEXT:    # k0 = isNegativeZero(mem)
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
@@ -148,6 +149,7 @@ define i16 @stack_fold_fpclassph_ymm(<16 x half> %a0) {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    vfpclassphy $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 32-byte Folded Reload
+; CHECK-NEXT:    # k0 = isNegativeZero(mem)
 ; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/ucmp.ll b/llvm/test/CodeGen/X86/ucmp.ll
index cd643cb8d637519..6a52acfe2fb3059 100644
--- a/llvm/test/CodeGen/X86/ucmp.ll
+++ b/llvm/test/CodeGen/X86/ucmp.ll
@@ -819,7 +819,7 @@ define <16 x i32> @ucmp_wide_vec_result(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; AVX512-NEXT:    vpcmpltub %xmm1, %xmm0, %k1
 ; AVX512-NEXT:    vpcmpnleub %xmm1, %xmm0, %k2
 ; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 {%k2} {z} = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1
+; AVX512-NEXT:    vpternlogd {{.*#+}} zmm1 = -1
 ; AVX512-NEXT:    vmovdqa32 %zmm1, %zmm0 {%k1}
 ; AVX512-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
index ff3d9ca378dbd52..135b14d6836a090 100644
--- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
+++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=x86_64-- -passes machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes early-machinelicm -mcpu=skx -verify-machineinstrs -o - %s | FileCheck %s
 --- |
   @x = dso_local global i32 0, align 4
   @z = dso_local local_unnamed_addr global [1024 x i32] zeroinitializer, align 16
diff --git a/llvm/test/CodeGen/X86/vector-compress.ll b/llvm/test/CodeGen/X86/vector-compress.ll
index 2b963ab896cc9e1..f8c076db65de949 100644
--- a/llvm/test/CodeGen/X86/vector-compress.ll
+++ b/llvm/test/CodeGen/X86/vector-compress.ll
@@ -1211,3 +1211,59 @@ define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i
     %out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef)
     ret <3 x i3> %out
 }
+
+define <4 x i32> @test_compress_v4i32_zero_passthru(<4 x i32> %vec, <4 x i1> %mask) {
+; AVX2-LABEL: test_compress_v4i32_zero_passthru:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX2-NEXT:    vmovaps %xmm2, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovss %xmm0, -{{[0-9]+}}(%rsp)
+; AVX2-NEXT:    vmovd %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    vextractps $1, %xmm0, -24(%rsp,%rax,4)
+; AVX2-NEXT:    vpextrd $1, %xmm1, %ecx
+; AVX2-NEXT:    andl $1, %ecx
+; AVX2-NEXT:    addq %rax, %rcx
+; AVX2-NEXT:    vextractps $2, %xmm0, -24(%rsp,%rcx,4)
+; AVX2-NEXT:    vpextrd $2, %xmm1, %eax
+; AVX2-NEXT:    andl $1, %eax
+; AVX2-NEXT:    addq %rcx, %rax
+; AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
+; AVX2-NEXT:    andl $1, %ecx
+; AVX2-NEXT:    addq %rax, %rcx
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax def $rax
+; AVX2-NEXT:    andl $3, %eax
+; AVX2-NEXT:    vextractps $3, %xmm0, -24(%rsp,%rax,4)
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    cmpq $3, %rcx
+; AVX2-NEXT:    movl $3, %edx
+; AVX2-NEXT:    cmovbq %rcx, %rdx
+; AVX2-NEXT:    vextractps $3, %xmm0, %ecx
+; AVX2-NEXT:    cmovbel %eax, %ecx
+; AVX2-NEXT:    movl %ecx, -24(%rsp,%rdx,4)
+; AVX2-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test_compress_v4i32_zero_passthru:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
+; AVX512F-NEXT:    kshiftlw $12, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $12, %k0, %k1
+; AVX512F-NEXT:    vpcompressd %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_compress_v4i32_zero_passthru:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX512VL-NEXT:    vptestmd %xmm1, %xmm1, %k1
+; AVX512VL-NEXT:    vpcompressd %xmm0, %xmm0 {%k1} {z}
+; AVX512VL-NEXT:    retq
+    %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> zeroinitializer)
+    ret <4 x i32> %out
+}
diff --git a/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll b/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll
index e88afe1b4c51189..2a6c89dcab5978b 100644
--- a/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll
+++ b/llvm/test/DebugInfo/Generic/debug-names-accel-table-types.ll
@@ -1,3 +1,4 @@
+; XFAIL: target={{.*}}-aix{{.*}}, target={{.*}}-zos{{.*}}
 ; RUN: %llc_dwarf -debugger-tune=lldb -accel-tables=Dwarf -filetype=obj -o %t < %s
 ; RUN: llvm-dwarfdump %t | FileCheck %s
 ; RUN: llvm-dwarfdump -debug-names %t | FileCheck --check-prefix=SAME-NAME %s
diff --git a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll
index 0e8f92cacf66d73..42560fc3958d1b6 100644
--- a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll
+++ b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll
@@ -1,5 +1,5 @@
-; RUN: opt -passes=ipsccp %s -S -o - | FileCheck %s
-; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp %s -S -o - | FileCheck %s
+; RUN: opt -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s
+; RUN: opt --try-experimental-debuginfo-iterators -passes=ipsccp -funcspec-for-literal-constant=false %s -S -o - | FileCheck %s
 
 ;; Check the dbg.assign DIAssignID operand gets remapped after cloning.
 
diff --git a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir
index d4d59e14724ebe7..b65a0e71af1dd2d 100644
--- a/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir
+++ b/llvm/test/DebugInfo/MIR/X86/mlicm-hoist-pre-regalloc.mir
@@ -1,6 +1,6 @@
 --- | 
-  ; RUN: llc -run-pass=machinelicm -o - %s | FileCheck %s
-  ; RUN: llc -passes=machinelicm -o - %s | FileCheck %s
+  ; RUN: llc -run-pass=early-machinelicm -o - %s | FileCheck %s
+  ; RUN: llc -passes=early-machinelicm -o - %s | FileCheck %s
   ; Line numbers should not be retained when loop invariant instructions are hoisted.
   ; Doing so causes poor stepping bevavior.
   ;
diff --git a/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_unsafe.ll b/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_blocking.ll
similarity index 87%
rename from llvm/test/Instrumentation/RealtimeSanitizer/rtsan_unsafe.ll
rename to llvm/test/Instrumentation/RealtimeSanitizer/rtsan_blocking.ll
index 5abf5de3044816e..80eb28c3923c2de 100644
--- a/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_unsafe.ll
+++ b/llvm/test/Instrumentation/RealtimeSanitizer/rtsan_blocking.ll
@@ -25,7 +25,7 @@ define noundef i32 @main() #2 {
   ret i32 0
 }
 
-attributes #0 = { mustprogress noinline sanitize_realtime_unsafe optnone ssp uwtable(sync) }
+attributes #0 = { mustprogress noinline sanitize_realtime_blocking optnone ssp uwtable(sync) }
 ;.
-; CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone sanitize_realtime_unsafe ssp uwtable(sync) }
+; CHECK: attributes #[[ATTR0]] = { mustprogress noinline optnone sanitize_realtime_blocking ssp uwtable(sync) }
 ;.
diff --git a/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
new file mode 100644
index 000000000000000..cf8d216581240ad
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/fmmla-diagnostics.s
@@ -0,0 +1,46 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm  2>&1 < %s| FileCheck %s
+
+fmmla v0.4h, v1.16b, v2.16b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4h, v1.16b, v2.16b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8s, v1.16b, v2.16b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
+// CHECK-NEXT: fmmla v0.8s, v1.16b, v2.16b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v1.4s, v2.4s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4s, v1.4s, v2.4s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8h, v1.8h, v2.8h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.16b, v1.16b, v2.16b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.16b, v1.16b, v2.16b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.d, v1.16b, v2.16b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.d, v1.16b, v2.16b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.2d, v1.16b, v2.16b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.2d, v1.16b, v2.16b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.8h, v1.8b, v2.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.8h, v1.8b, v2.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla v0.4s, v1.8b, v2.8b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fmmla v0.4s, v1.8b, v2.8b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/FP8/fmmla.s b/llvm/test/MC/AArch64/FP8/fmmla.s
new file mode 100644
index 000000000000000..922f4c9d918ce98
--- /dev/null
+++ b/llvm/test/MC/AArch64/FP8/fmmla.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm  < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm  < %s \
+// RUN:        | llvm-objdump -d --mattr=+f8f16mm,+f8f32mm  - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm  < %s \
+// RUN:        | llvm-objdump -d  --no-print-imm-hex --mattr=-f8f16mm,-f8f32mm - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm  < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm  -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fmmla v0.8h, v1.16b, v2.16b
+// CHECK-INST: fmmla v0.8h, v1.16b, v2.16b
+// CHECK-ENCODING: [0x20,0xec,0x02,0x6e]
+// CHECK-ERROR: instruction requires: f8f16mm
+// CHECK-UNKNOWN: 6e02ec20 <unknown>
+
+fmmla v0.4s, v1.16b, v2.16b
+// CHECK-INST: fmmla v0.4s, v1.16b, v2.16b
+// CHECK-ENCODING: [0x20,0xec,0x82,0x6e]
+// CHECK-ERROR: instruction requires: f8f32mm
+// CHECK-UNKNOWN: 6e82ec20 <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s
new file mode 100644
index 000000000000000..c28cc5cd426dda5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/bfmul-diagnostics.s
@@ -0,0 +1,111 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2,+sve-bfscale 2>&1 < %s| FileCheck %s
+
+// Multiple and single, 2 regs
+
+bfmul   {z0.s-z1.s}, {z0.h-z1.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z1.h-z2.h}, {z0.h-z1.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfmul   {z0.h-z2.h}, {z0.h-z1.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z0.s-z1.s}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z1.h-z2.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfmul   {z0.h-z1.h}, {z0.h-z2.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+// Multiple and single, 4 regs
+
+bfmul   {z0.s-z3.s}, {z0.h-z3.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z1.h-z4.h}, {z0.h-z3.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfmul   {z0.h-z4.h}, {z0.h-z3.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfmul   {z0.h-z3.h}, {z0.s-z3.s}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z3.h}, {z1.h-z4.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfmul   {z0.h-z3.h}, {z0.h-z4.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+// Multiple, 2 regs
+
+bfmul   {z0.s-z1.s}, {z0.h-z1.h}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z1.h-z2.h}, {z0.h-z1.h}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfmul   {z0.h-z2.h}, {z0.h-z1.h}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z0.s-z1.s}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z1.h-z2.h}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfmul   {z0.h-z1.h}, {z0.h-z2.h}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, {z0.s-z1.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, {z1.h-z2.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z2.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+// Multiple, 4 regs
+
+bfmul   {z0.s-z3.s}, {z0.h-z3.h}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z1.h-z4.h}, {z0.h-z3.h}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfmul   {z0.h-z4.h}, {z0.h-z3.h}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfmul   {z0.h-z3.h}, {z0.s-z3.s}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z3.h}, {z1.h-z4.h}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfmul   {z0.h-z3.h}, {z0.h-z4.h}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, {z0.s-z3.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, {z1.h-z4.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z4.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
diff --git a/llvm/test/MC/AArch64/SME2/bfmul.s b/llvm/test/MC/AArch64/SME2/bfmul.s
new file mode 100644
index 000000000000000..10a43848c738199
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/bfmul.s
@@ -0,0 +1,92 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sve-bfscale - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sve-bfscale -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Multiple and single, 2 regs
+
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, z0.h  // 11000001-00100000-11101000-00000000
+// CHECK-INST: bfmul   { z0.h, z1.h }, { z0.h, z1.h }, z0.h
+// CHECK-ENCODING: [0x00,0xe8,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c120e800 <unknown>
+
+bfmul   {z20.h-z21.h}, {z10.h-z11.h}, z10.h  // 11000001-00110100-11101001-01010100
+// CHECK-INST: bfmul   { z20.h, z21.h }, { z10.h, z11.h }, z10.h
+// CHECK-ENCODING: [0x54,0xe9,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c134e954 <unknown>
+
+bfmul   {z30.h-z31.h}, {z30.h-z31.h}, z15.h  // 11000001-00111110-11101011-11011110
+// CHECK-INST: bfmul   { z30.h, z31.h }, { z30.h, z31.h }, z15.h
+// CHECK-ENCODING: [0xde,0xeb,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c13eebde <unknown>
+
+// Multiple and single, 4 regs
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, z0.h  // 11000001-00100001-11101000-00000000
+// CHECK-INST: bfmul   { z0.h - z3.h }, { z0.h - z3.h }, z0.h
+// CHECK-ENCODING: [0x00,0xe8,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c121e800 <unknown>
+
+bfmul   {z20.h-z23.h}, {z8.h-z11.h}, z10.h  // 11000001-00110101-11101001-00010100
+// CHECK-INST: bfmul   { z20.h - z23.h }, { z8.h - z11.h }, z10.h
+// CHECK-ENCODING: [0x14,0xe9,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c135e914 <unknown>
+
+bfmul   {z28.h-z31.h}, {z28.h-z31.h}, z15.h  // 11000001-00111111-11101011-10011100
+// CHECK-INST: bfmul   { z28.h - z31.h }, { z28.h - z31.h }, z15.h
+// CHECK-ENCODING: [0x9c,0xeb,0x3f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c13feb9c <unknown>
+
+// Multiple, 2 regs
+bfmul   {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}  // 11000001-00100000-11100100-00000000
+// CHECK-INST: bfmul   { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
+// CHECK-ENCODING: [0x00,0xe4,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c120e400 <unknown>
+
+bfmul   {z20.h-z21.h}, {z10.h-z11.h}, {z20.h-z21.h}  // 11000001-00110100-11100101-01010100
+// CHECK-INST: bfmul   { z20.h, z21.h }, { z10.h, z11.h }, { z20.h, z21.h }
+// CHECK-ENCODING: [0x54,0xe5,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c134e554 <unknown>
+
+bfmul   {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h}  // 11000001-00111110-11100111-11011110
+// CHECK-INST: bfmul   { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xde,0xe7,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c13ee7de <unknown>
+
+// Multiple, 4 regs
+
+bfmul   {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}  // 11000001-00100001-11100100-00000000
+// CHECK-INST: bfmul   { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
+// CHECK-ENCODING: [0x00,0xe4,0x21,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c121e400 <unknown>
+
+bfmul   {z20.h-z23.h}, {z8.h-z11.h}, {z20.h-z23.h}  // 11000001-00110101-11100101-00010100
+// CHECK-INST: bfmul   { z20.h - z23.h }, { z8.h - z11.h }, { z20.h - z23.h }
+// CHECK-ENCODING: [0x14,0xe5,0x35,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c135e514 <unknown>
+
+bfmul   {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h}  // 11000001-00111101-11100111-10011100
+// CHECK-INST: bfmul   { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
+// CHECK-ENCODING: [0x9c,0xe7,0x3d,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c13de79c <unknown>
diff --git a/llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s
new file mode 100644
index 000000000000000..63367eed65b6c92
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/bfscale-diagnostics.s
@@ -0,0 +1,87 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale 2>&1 < %s| FileCheck %s
+
+// Multiple and single vector, 2 regs
+
+bfscale  {z0.s-z1.s}, {z0.s-z1.s}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z1.h-z2.h}, {z1.h-z2.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfscale  {z0.h-z2.h}, {z0.h-z2.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z0.h-z1.h}, {z0.h-z1.h}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+bfscale  {z0.h-z1.h}, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+bfscale  {z0.h-z1.h}, {z2.h-z3.h}, z8.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list
+
+// Multiple and single vector, 4 regs
+
+bfscale  {z0.s-z3.s}, {z0.s-z3.s}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z1.h-z4.h}, {z1.h-z4.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfscale  {z0.h-z4.h}, {z0.h-z4.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfscale  {z0.h-z3.h}, {z0.h-z3.h}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+bfscale  {z0.h-z3.h}, {z0.h-z3.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+bfscale  {z0.h-z3.h}, {z4.h-z7.h}, z8.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list
+
+// Multiple vectors, 2 regs
+
+bfscale  {z0.s-z1.s}, {z0.s-z1.s}, {z2.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z1.h-z2.h}, {z1.h-z2.h}, {z2.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfscale  {z0.h-z2.h}, {z0.h-z4.h}, {z2.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfscale  {z0.h-z1.h}, {z0.h-z1.h}, {z2.s-z3.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z0.h-z1.h}, {z0.h-z1.h}, {z28.h-z30.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z0.h-z1.h}, {z0.h-z1.h}, {z29.h-z30.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+bfscale  {z0.h-z1.h}, {z2.h-z3.h}, {z28.h-z29.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list
+
+// Multiple vectors, 4 regs
+
+bfscale  {z0.s-z3.s}, {z0.s-z3.s}, {z4.h-z7.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z1.h-z4.h}, {z1.h-z4.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfscale  {z0.h-z4.h}, {z0.h-z4.h}, {z4.h-z7.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error:  invalid number of vectors
+
+bfscale  {z0.h-z3.h}, {z0.h-z3.h}, {z4.s-z7.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfscale  {z0.h-z3.h}, {z0.h-z3.h}, {z4.h-z8.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+bfscale  {z0.h-z3.h}, {z0.h-z3.h}, {z5.h-z8.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types
+
+bfscale  {z0.h-z3.h}, {z4.h-z7.h}, {z8.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must match destination register list
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SME2/bfscale.s b/llvm/test/MC/AArch64/SME2/bfscale.s
new file mode 100644
index 000000000000000..719d013b235c053
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2/bfscale.s
@@ -0,0 +1,93 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2,+sve-bfscale - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sve-bfscale < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+sve-bfscale -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Multiple and single vector, 2 regs
+
+bfscale {z0.h-z1.h}, {z0.h-z1.h}, z0.h  // 11000001-00100000-10100001-10000000
+// CHECK-INST: bfscale { z0.h, z1.h }, { z0.h, z1.h }, z0.h
+// CHECK-ENCODING: [0x80,0xa1,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c120a180 <unknown>
+
+bfscale {z20.h-z21.h}, {z20.h-z21.h}, z5.h  // 11000001-00100101-10100001-10010100
+// CHECK-INST: bfscale { z20.h, z21.h }, { z20.h, z21.h }, z5.h
+// CHECK-ENCODING: [0x94,0xa1,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c125a194 <unknown>
+
+bfscale {z30.h-z31.h}, {z30.h-z31.h}, z15.h  // 11000001-00101111-10100001-10011110
+// CHECK-INST: bfscale { z30.h, z31.h }, { z30.h, z31.h }, z15.h
+// CHECK-ENCODING: [0x9e,0xa1,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c12fa19e <unknown>
+
+// Multiple and single vector, 4 regs
+
+bfscale {z0.h-z3.h}, {z0.h-z3.h}, z0.h  // 11000001-00100000-10101001-10000000
+// CHECK-INST: bfscale { z0.h - z3.h }, { z0.h - z3.h }, z0.h
+// CHECK-ENCODING: [0x80,0xa9,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c120a980 <unknown>
+
+bfscale {z20.h-z23.h}, {z20.h-z23.h}, z5.h  // 11000001-00100101-10101001-10010100
+// CHECK-INST: bfscale { z20.h - z23.h }, { z20.h - z23.h }, z5.h
+// CHECK-ENCODING: [0x94,0xa9,0x25,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c125a994 <unknown>
+
+bfscale {z28.h-z31.h}, {z28.h-z31.h}, z15.h  // 11000001-00101111-10101001-10011100
+// CHECK-INST: bfscale { z28.h - z31.h }, { z28.h - z31.h }, z15.h
+// CHECK-ENCODING: [0x9c,0xa9,0x2f,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c12fa99c <unknown>
+
+// Multiple vectors, 2 regs
+
+bfscale {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}  // 11000001-00100000-10110001-10000000
+// CHECK-INST: bfscale { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
+// CHECK-ENCODING: [0x80,0xb1,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c120b180 <unknown>
+
+bfscale {z20.h-z21.h}, {z20.h-z21.h}, {z20.h-z21.h}  // 11000001-00110100-10110001-10010100
+// CHECK-INST: bfscale { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h }
+// CHECK-ENCODING: [0x94,0xb1,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c134b194 <unknown>
+
+bfscale {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h}  // 11000001-00111110-10110001-10011110
+// CHECK-INST: bfscale { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0x9e,0xb1,0x3e,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c13eb19e <unknown>
+
+// Multiple vectors, 4 regs
+
+bfscale {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}  // 11000001-00100000-10111001-10000000
+// CHECK-INST: bfscale { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
+// CHECK-ENCODING: [0x80,0xb9,0x20,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c120b980 <unknown>
+
+bfscale {z20.h-z23.h}, {z20.h-z23.h}, {z20.h-z23.h}  // 11000001-00110100-10111001-10010100
+// CHECK-INST: bfscale { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h }
+// CHECK-ENCODING: [0x94,0xb9,0x34,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c134b994 <unknown>
+
+bfscale {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h}  // 11000001-00111100-10111001-10011100
+// CHECK-INST: bfscale { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
+// CHECK-ENCODING: [0x9c,0xb9,0x3c,0xc1]
+// CHECK-ERROR: instruction requires: sme2 sve-bfscale
+// CHECK-UNKNOWN: c13cb99c <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
new file mode 100644
index 000000000000000..5906bcb07f15d5a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening-diagnostics.s
@@ -0,0 +1,220 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s
+
+// BFMOP4A
+
+// Single vectors
+
+bfmop4a za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+bfmop4a za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4a za0.s, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+bfmop4a za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+bfmop4a za0.s, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4a za0.s, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+bfmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4a za4.s, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4a za0.s, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+
+// BFMOP4S
+
+// Single vectors
+
+bfmop4s za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+bfmop4s za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+bfmop4s za0.s, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+bfmop4s za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+bfmop4s za0.s, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+bfmop4s za0.s, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+bfmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s
+
+bfmop4s za4.s, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+bfmop4s za0.s, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+bfmop4s za0.s, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
new file mode 100644
index 000000000000000..40d08e503c8bb32
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-widening.s
@@ -0,0 +1,178 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// BFMOP4A
+
+// Single vectors
+
+bfmop4a za0.s, z0.h, z16.h  // 10000001-00000000-00000000-00000000
+// CHECK-INST: bfmop4a za0.s, z0.h, z16.h
+// CHECK-ENCODING: [0x00,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000000 <unknown>
+
+bfmop4a za3.s, z14.h, z30.h  // 10000001-00001110-00000001-11000011
+// CHECK-INST: bfmop4a za3.s, z14.h, z30.h
+// CHECK-ENCODING: [0xc3,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e01c3 <unknown>
+
+bfmop4a za1.s, z10.h, z20.h  // 10000001-00000100-00000001-01000001
+// CHECK-INST: bfmop4a za1.s, z10.h, z20.h
+// CHECK-ENCODING: [0x41,0x01,0x04,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81040141 <unknown>
+
+// Single and multiple vectors
+
+bfmop4a za0.s, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00000000
+// CHECK-INST: bfmop4a za0.s, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x00,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100000 <unknown>
+
+bfmop4a za3.s, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11000011
+// CHECK-INST: bfmop4a za3.s, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc3,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e01c3 <unknown>
+
+bfmop4a za2.s, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10000010
+// CHECK-INST: bfmop4a za2.s, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x82,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81180182 <unknown>
+
+// Multiple and single vectors
+
+bfmop4a za0.s, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00000000
+// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x00,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000200 <unknown>
+
+bfmop4a za3.s, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11000011
+// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xc3,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e03c3 <unknown>
+
+bfmop4a za2.s, {z12.h-z13.h}, z28.h  // 10000001-00001100-00000011-10000010
+// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, z28.h
+// CHECK-ENCODING: [0x82,0x03,0x0c,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810c0382 <unknown>
+
+// Multiple vectors
+
+bfmop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00000000
+// CHECK-INST: bfmop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x00,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100200 <unknown>
+
+bfmop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11000011
+// CHECK-INST: bfmop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc3,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e03c3 <unknown>
+
+bfmop4a za2.s, {z12.h-z13.h}, {z26.h-z27.h}  // 10000001-00011010-00000011-10000010
+// CHECK-INST: bfmop4a za2.s, { z12.h, z13.h }, { z26.h, z27.h }
+// CHECK-ENCODING: [0x82,0x03,0x1a,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811a0382 <unknown>
+
+
+// BFMOP4S
+
+// Single vectors
+
+bfmop4s za0.s, z0.h, z16.h  // 10000001-00000000-00000000-00010000
+// CHECK-INST: bfmop4s za0.s, z0.h, z16.h
+// CHECK-ENCODING: [0x10,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000010 <unknown>
+
+bfmop4s za3.s, z14.h, z30.h  // 10000001-00001110-00000001-11010011
+// CHECK-INST: bfmop4s za3.s, z14.h, z30.h
+// CHECK-ENCODING: [0xd3,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e01d3 <unknown>
+
+bfmop4s za1.s, z10.h, z20.h  // 10000001-00000100-00000001-01010001
+// CHECK-INST: bfmop4s za1.s, z10.h, z20.h
+// CHECK-ENCODING: [0x51,0x01,0x04,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81040151 <unknown>
+
+// Single and multiple vectors
+
+bfmop4s za0.s, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00010000
+// CHECK-INST: bfmop4s za0.s, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x10,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100010 <unknown>
+
+bfmop4s za3.s, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11010011
+// CHECK-INST: bfmop4s za3.s, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd3,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e01d3 <unknown>
+
+bfmop4s za2.s, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10010010
+// CHECK-INST: bfmop4s za2.s, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x92,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81180192 <unknown>
+
+// Multiple and single vectors
+
+bfmop4s za0.s, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00010000
+// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x10,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81000210 <unknown>
+
+bfmop4s za3.s, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11010011
+// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xd3,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810e03d3 <unknown>
+
+bfmop4s za2.s, {z12.h-z13.h}, z28.h  // 10000001-00001100-00000011-10010010
+// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, z28.h
+// CHECK-ENCODING: [0x92,0x03,0x0c,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 810c0392 <unknown>
+
+// Multiple vectors
+
+bfmop4s za0.s, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00010000
+// CHECK-INST: bfmop4s za0.s, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x10,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 81100210 <unknown>
+
+bfmop4s za3.s, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11010011
+// CHECK-INST: bfmop4s za3.s, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd3,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811e03d3 <unknown>
+
+bfmop4s za2.s, {z12.h-z13.h}, {z26.h-z27.h}  // 10000001-00011010-00000011-10010010
+// CHECK-INST: bfmop4s za2.s, { z12.h, z13.h }, { z26.h, z27.h }
+// CHECK-ENCODING: [0x92,0x03,0x1a,0x81]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: 811a0392 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s
new file mode 100644
index 000000000000000..9a06192c0b30af4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening-diagnostics.s
@@ -0,0 +1,120 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32 < %s 2>&1 | FileCheck %s
+
+// Single vectors
+
+fmop4a za0.d, z0.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, z0.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.d, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z15.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z16.b, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z0.b, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, z12.b, z17.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, z12.b, z14.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, z12.b, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+// Single and multiple vectors
+
+fmop4a za0.d, z0.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, z0.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.d, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z1.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z16.b, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.b..z14.b
+
+fmop4a za0.s, z0.b, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.b, {z17.b-z18.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, z0.b, {z16.b-z18.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, z0.b, {z12.b-z13.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+fmop4a za0.d, {z0.b-z1.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, {z0.b-z1.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.s-z1.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+fmop4a za0.s, {z1.b-z2.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z2.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z16.b-z17.b}, z16.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z1.b}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, {z0.b-z1.b}, z17.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+fmop4a za0.s, {z0.b-z1.b}, z12.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.b..z30.b
+
+// Multiple vectors
+
+fmop4a za0.d, {z0.b-z1.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za4.s, {z0.b-z1.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.s-z1.s}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z1.b-z2.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z2.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z18.b-z19.b}, {z16.b-z17.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z1.b}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.b-z1.b}, {z19.b-z20.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.s, {z0.b-z1.b}, {z18.b-z20.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.s, {z0.b-z1.b}, {z10.b-z11.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s
new file mode 100644
index 000000000000000..9e378bcf3d75335
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4a-fp8-fp32-widening.s
@@ -0,0 +1,93 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2,+sme-f8f32 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f8f32 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f8f32 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Single vectors
+
+fmop4a  za0.s, z0.b, z16.b  // 10000000-00100000-00000000-00000000
+// CHECK-INST: fmop4a  za0.s, z0.b, z16.b
+// CHECK-ENCODING: [0x00,0x00,0x20,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80200000 <unknown>
+
+fmop4a  za1.s, z10.b, z20.b  // 10000000-00100100-00000001-01000001
+// CHECK-INST: fmop4a  za1.s, z10.b, z20.b
+// CHECK-ENCODING: [0x41,0x01,0x24,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80240141 <unknown>
+
+fmop4a  za3.s, z14.b, z30.b  // 10000000-00101110-00000001-11000011
+// CHECK-INST: fmop4a  za3.s, z14.b, z30.b
+// CHECK-ENCODING: [0xc3,0x01,0x2e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 802e01c3 <unknown>
+
+// Single and multiple vectors
+
+fmop4a  za0.s, z0.b, {z16.b-z17.b}  // 10000000-00110000-00000000-00000000
+// CHECK-INST: fmop4a  za0.s, z0.b, { z16.b, z17.b }
+// CHECK-ENCODING: [0x00,0x00,0x30,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80300000 <unknown>
+
+fmop4a  za1.s, z10.b, {z20.b-z21.b}  // 10000000-00110100-00000001-01000001
+// CHECK-INST: fmop4a  za1.s, z10.b, { z20.b, z21.b }
+// CHECK-ENCODING: [0x41,0x01,0x34,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80340141 <unknown>
+
+fmop4a  za3.s, z14.b, {z30.b-z31.b}  // 10000000-00111110-00000001-11000011
+// CHECK-INST: fmop4a  za3.s, z14.b, { z30.b, z31.b }
+// CHECK-ENCODING: [0xc3,0x01,0x3e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 803e01c3 <unknown>
+
+// Multiple and single vectors
+
+fmop4a  za0.s, {z0.b-z1.b}, z16.b  // 10000000-00100000-00000010-00000000
+// CHECK-INST: fmop4a  za0.s, { z0.b, z1.b }, z16.b
+// CHECK-ENCODING: [0x00,0x02,0x20,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80200200 <unknown>
+
+fmop4a  za1.s, {z10.b-z11.b}, z20.b  // 10000000-00100100-00000011-01000001
+// CHECK-INST: fmop4a  za1.s, { z10.b, z11.b }, z20.b
+// CHECK-ENCODING: [0x41,0x03,0x24,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80240341 <unknown>
+
+fmop4a  za3.s, {z14.b-z15.b}, z30.b  // 10000000-00101110-00000011-11000011
+// CHECK-INST: fmop4a  za3.s, { z14.b, z15.b }, z30.b
+// CHECK-ENCODING: [0xc3,0x03,0x2e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 802e03c3 <unknown>
+
+// Multiple vectors
+
+fmop4a  za0.s, {z0.b-z1.b}, {z16.b-z17.b}  // 10000000-00110000-00000010-00000000
+// CHECK-INST: fmop4a  za0.s, { z0.b, z1.b }, { z16.b, z17.b }
+// CHECK-ENCODING: [0x00,0x02,0x30,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80300200 <unknown>
+
+fmop4a  za1.s, {z10.b-z11.b}, {z20.b-z21.b}  // 10000000-00110100-00000011-01000001
+// CHECK-INST: fmop4a  za1.s, { z10.b, z11.b }, { z20.b, z21.b }
+// CHECK-ENCODING: [0x41,0x03,0x34,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 80340341 <unknown>
+
+fmop4a  za3.s, {z14.b-z15.b}, {z30.b-z31.b}  // 10000000-00111110-00000011-11000011
+// CHECK-INST: fmop4a  za3.s, { z14.b, z15.b }, { z30.b, z31.b }
+// CHECK-ENCODING: [0xc3,0x03,0x3e,0x80]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f8f32
+// CHECK-UNKNOWN: 803e03c3 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s
new file mode 100644
index 000000000000000..0272721e083621d
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening-diagnostics.s
@@ -0,0 +1,220 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f16f16 < %s 2>&1 | FileCheck %s
+
+// FMOP4A
+
+// Single vectors
+
+fmop4a za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+fmop4a za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4a za0.h, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+fmop4a za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+fmop4a za0.h, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4a za0.h, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+fmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4a za2.h, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4a za0.h, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4a za0.h, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+
+// FMOP4S
+
+// Single vectors
+
+fmop4s za0.d, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, z0.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, z0.s, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z15.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z16.h, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z0.h, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, z12.h, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, z12.h, z14.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, z12.h, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Single and multiple vectors
+
+fmop4s za0.d, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, z0.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, z0.s, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z1.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z16.h, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h
+
+fmop4s za0.h, z0.h, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, z0.h, {z17.h-z18.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, z0.h, {z12.h-z13.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+// Multiple and single vectors
+
+fmop4s za0.d, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, {z0.h-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z0.s-z1.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix
+
+fmop4s za0.h, {z1.h-z2.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z16.h-z17.h}, z16.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z0.h-z1.h}, z16.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, {z0.h-z1.h}, z17.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+fmop4s za0.h, {z0.h-z1.h}, z12.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h
+
+// Multiple vectors
+
+fmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand
+
+fmop4s za2.h, {z0.h-z1.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z0.s-z1.s}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z1.h-z2.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z18.h-z19.h}, {z16.h-z17.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z0.h-z1.h}, {z16.s-z17.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmop4s za0.h, {z0.h-z1.h}, {z19.h-z20.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
+
+fmop4s za0.h, {z0.h-z1.h}, {z10.h-z11.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types
diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
new file mode 100644
index 000000000000000..2a94acd35e95c30
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-non-widening.s
@@ -0,0 +1,179 @@
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2,+sme-f16f16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f16f16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f16f16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+// FMOP4A
+
+// Single vectors
+
+fmop4a  za0.h, z0.h, z16.h  // 10000001-00000000-00000000-00001000
+// CHECK-INST: fmop4a  za0.h, z0.h, z16.h
+// CHECK-ENCODING: [0x08,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000008 <unknown>
+
+fmop4a  za1.h, z12.h, z24.h  // 10000001-00001000-00000001-10001001
+// CHECK-INST: fmop4a  za1.h, z12.h, z24.h
+// CHECK-ENCODING: [0x89,0x01,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080189 <unknown>
+
+fmop4a  za1.h, z14.h, z30.h  // 10000001-00001110-00000001-11001001
+// CHECK-INST: fmop4a  za1.h, z14.h, z30.h
+// CHECK-ENCODING: [0xc9,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e01c9 <unknown>
+
+// Single and multiple vectors
+
+fmop4a  za0.h, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00001000
+// CHECK-INST: fmop4a  za0.h, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x08,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100008 <unknown>
+
+fmop4a  za1.h, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10001001
+// CHECK-INST: fmop4a  za1.h, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x89,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180189 <unknown>
+
+fmop4a  za1.h, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11001001
+// CHECK-INST: fmop4a  za1.h, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc9,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e01c9 <unknown>
+
+// Multiple and single vectors
+
+fmop4a  za0.h, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00001000
+// CHECK-INST: fmop4a  za0.h, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x08,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000208 <unknown>
+
+fmop4a  za1.h, {z12.h-z13.h}, z24.h  // 10000001-00001000-00000011-10001001
+// CHECK-INST: fmop4a  za1.h, { z12.h, z13.h }, z24.h
+// CHECK-ENCODING: [0x89,0x03,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080389 <unknown>
+
+fmop4a  za1.h, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11001001
+// CHECK-INST: fmop4a  za1.h, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xc9,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e03c9 <unknown>
+
+// Multiple vectors
+
+fmop4a  za0.h, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00001000
+// CHECK-INST: fmop4a  za0.h, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x08,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100208 <unknown>
+
+fmop4a  za1.h, {z12.h-z13.h}, {z24.h-z25.h}  // 10000001-00011000-00000011-10001001
+// CHECK-INST: fmop4a  za1.h, { z12.h, z13.h }, { z24.h, z25.h }
+// CHECK-ENCODING: [0x89,0x03,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180389 <unknown>
+
+fmop4a  za1.h, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11001001
+// CHECK-INST: fmop4a  za1.h, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xc9,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e03c9 <unknown>
+
+// FMOP4S
+
+// Single vectors
+
+fmop4s  za0.h, z0.h, z16.h  // 10000001-00000000-00000000-00011000
+// CHECK-INST: fmop4s  za0.h, z0.h, z16.h
+// CHECK-ENCODING: [0x18,0x00,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000018 <unknown>
+
+fmop4s  za1.h, z12.h, z24.h  // 10000001-00001000-00000001-10011001
+// CHECK-INST: fmop4s  za1.h, z12.h, z24.h
+// CHECK-ENCODING: [0x99,0x01,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080199 <unknown>
+
+fmop4s  za1.h, z14.h, z30.h  // 10000001-00001110-00000001-11011001
+// CHECK-INST: fmop4s  za1.h, z14.h, z30.h
+// CHECK-ENCODING: [0xd9,0x01,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e01d9 <unknown>
+
+// Single and multiple vectors
+
+fmop4s  za0.h, z0.h, {z16.h-z17.h}  // 10000001-00010000-00000000-00011000
+// CHECK-INST: fmop4s  za0.h, z0.h, { z16.h, z17.h }
+// CHECK-ENCODING: [0x18,0x00,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100018 <unknown>
+
+fmop4s  za1.h, z12.h, {z24.h-z25.h}  // 10000001-00011000-00000001-10011001
+// CHECK-INST: fmop4s  za1.h, z12.h, { z24.h, z25.h }
+// CHECK-ENCODING: [0x99,0x01,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180199 <unknown>
+
+fmop4s  za1.h, z14.h, {z30.h-z31.h}  // 10000001-00011110-00000001-11011001
+// CHECK-INST: fmop4s  za1.h, z14.h, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd9,0x01,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e01d9 <unknown>
+
+// Multiple and single vectors
+
+fmop4s  za0.h, {z0.h-z1.h}, z16.h  // 10000001-00000000-00000010-00011000
+// CHECK-INST: fmop4s  za0.h, { z0.h, z1.h }, z16.h
+// CHECK-ENCODING: [0x18,0x02,0x00,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81000218 <unknown>
+
+fmop4s  za1.h, {z12.h-z13.h}, z24.h  // 10000001-00001000-00000011-10011001
+// CHECK-INST: fmop4s  za1.h, { z12.h, z13.h }, z24.h
+// CHECK-ENCODING: [0x99,0x03,0x08,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81080399 <unknown>
+
+fmop4s  za1.h, {z14.h-z15.h}, z30.h  // 10000001-00001110-00000011-11011001
+// CHECK-INST: fmop4s  za1.h, { z14.h, z15.h }, z30.h
+// CHECK-ENCODING: [0xd9,0x03,0x0e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 810e03d9 <unknown>
+
+// Multiple vectors
+
+fmop4s  za0.h, {z0.h-z1.h}, {z16.h-z17.h}  // 10000001-00010000-00000010-00011000
+// CHECK-INST: fmop4s  za0.h, { z0.h, z1.h }, { z16.h, z17.h }
+// CHECK-ENCODING: [0x18,0x02,0x10,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81100218 <unknown>
+
+fmop4s  za1.h, {z12.h-z13.h}, {z24.h-z25.h}  // 10000001-00011000-00000011-10011001
+// CHECK-INST: fmop4s  za1.h, { z12.h, z13.h }, { z24.h, z25.h }
+// CHECK-ENCODING: [0x99,0x03,0x18,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 81180399 <unknown>
+
+fmop4s  za1.h, {z14.h-z15.h}, {z30.h-z31.h}  // 10000001-00011110-00000011-11011001
+// CHECK-INST: fmop4s  za1.h, { z14.h, z15.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xd9,0x03,0x1e,0x81]
+// CHECK-ERROR: instruction requires: sme2p2 sme-f16f16
+// CHECK-UNKNOWN: 811e03d9 <unknown>
diff --git a/llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s
new file mode 100644
index 000000000000000..2fdd3f82adc1ddd
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmul-diagnostics.s
@@ -0,0 +1,112 @@
+
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 2>&1 < %s| FileCheck %s
+
+// Multiple and single, 2 regs
+
+fmul    {z0.b-z1.b}, {z0.h-z1.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z1.s-z2.s}, {z0.s-z1.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+fmul    {z0.d-z2.d}, {z0.d-z1.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.h-z1.h}, {z0.b-z1.b}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.s-z1.s}, {z1.s-z2.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+fmul    {z0.d-z1.d}, {z0.d-z2.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.h-z1.h}, {z0.h-z1.h}, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+fmul    {z0.s-z1.s}, {z0.s-z1.s}, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.s..z15.s
+
+// Multiple and single, 4 regs
+
+fmul    {z0.b-z3.b}, {z0.h-z3.h}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z1.s-z3.s}, {z0.h-z3.h}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.d-z4.d}, {z0.d-z3.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+fmul    {z0.h-z3.h}, {z0.b-z3.b}, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.s-z3.s}, {z1.s-z3.s}, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.d-z3.d}, {z0.d-z4.d}, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+fmul    {z0.h-z3.h}, {z0.h-z3.h}, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h
+
+fmul    {z0.s-z3.s}, {z0.s-z3.s}, z16.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.s..z15.s
+
+// Multiple, 2 regs
+
+fmul    {z0.b-z1.b}, {z0.h-z1.h}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z1.s-z2.s}, {z0.s-z1.s}, {z0.s-z1.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+fmul    {z0.d-z2.d}, {z0.d-z1.d}, {z0.d-z1.d}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.h-z1.h}, {z0.b-z1.b}, {z0.h-z1.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.s-z1.s}, {z1.s-z2.s}, {z0.s-z1.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+fmul    {z0.d-z1.d}, {z0.d-z2.d}, {z0.d-z1.d}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.h-z1.h}, {z0.h-z1.h}, {z0.b-z1.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.s-z1.s}, {z0.s-z1.s}, {z1.s-z2.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+
+fmul    {z0.d-z1.d}, {z0.d-z1.d}, {z0.d-z2.d}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+// Multiple, 4 regs
+
+fmul    {z0.b-z3.b}, {z0.h-z3.h}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z1.s-z3.s}, {z0.s-z3.s}, {z0.s-z3.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.d-z4.d}, {z0.d-z3.d}, {z0.d-z3.d}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+fmul    {z0.h-z3.h}, {z0.b-z3.b}, {z0.h-z3.h}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.s-z3.s}, {z1.s-z3.s}, {z0.s-z3.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.d-z3.d}, {z0.d-z4.d}, {z0.d-z3.d}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+
+fmul    {z0.h-z3.h}, {z0.h-z3.h}, {z0.b-z3.b}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.s-z3.s}, {z0.s-z3.s}, {z1.s-z3.s}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+fmul    {z0.d-z3.d}, {z0.d-z3.d}, {z0.d-z4.d}
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
diff --git a/llvm/test/MC/AArch64/SME2p2/fmul.s b/llvm/test/MC/AArch64/SME2p2/fmul.s
new file mode 100644
index 000000000000000..ec6f523867cef51
--- /dev/null
+++ b/llvm/test/MC/AArch64/SME2p2/fmul.s
@@ -0,0 +1,261 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// Multiple and single, 2 regs
+
+// 16-bit elements
+
+fmul    {z0.h-z1.h}, {z0.h-z1.h}, z0.h  // 11000001-01100000-11101000-00000000
+// CHECK-INST: fmul    { z0.h, z1.h }, { z0.h, z1.h }, z0.h
+// CHECK-ENCODING: [0x00,0xe8,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c160e800 <unknown>
+
+fmul    {z20.h-z21.h}, {z10.h-z11.h}, z10.h  // 11000001-01110100-11101001-01010100
+// CHECK-INST: fmul    { z20.h, z21.h }, { z10.h, z11.h }, z10.h
+// CHECK-ENCODING: [0x54,0xe9,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c174e954 <unknown>
+
+fmul    {z30.h-z31.h}, {z30.h-z31.h}, z15.h  // 11000001-01111110-11101011-11011110
+// CHECK-INST: fmul    { z30.h, z31.h }, { z30.h, z31.h }, z15.h
+// CHECK-ENCODING: [0xde,0xeb,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c17eebde <unknown>
+
+// 32-bit elements
+
+fmul    {z0.s-z1.s}, {z0.s-z1.s}, z0.s  // 11000001-10100000-11101000-00000000
+// CHECK-INST: fmul    { z0.s, z1.s }, { z0.s, z1.s }, z0.s
+// CHECK-ENCODING: [0x00,0xe8,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1a0e800 <unknown>
+
+fmul    {z20.s-z21.s}, {z10.s-z11.s}, z10.s  // 11000001-10110100-11101001-01010100
+// CHECK-INST: fmul    { z20.s, z21.s }, { z10.s, z11.s }, z10.s
+// CHECK-ENCODING: [0x54,0xe9,0xb4,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1b4e954 <unknown>
+
+fmul    {z30.s-z31.s}, {z30.s-z31.s}, z15.s  // 11000001-10111110-11101011-11011110
+// CHECK-INST: fmul    { z30.s, z31.s }, { z30.s, z31.s }, z15.s
+// CHECK-ENCODING: [0xde,0xeb,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1beebde <unknown>
+
+// 64-bit elements
+
+fmul    {z0.d-z1.d}, {z0.d-z1.d}, z0.d  // 11000001-11100000-11101000-00000000
+// CHECK-INST: fmul    { z0.d, z1.d }, { z0.d, z1.d }, z0.d
+// CHECK-ENCODING: [0x00,0xe8,0xe0,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1e0e800 <unknown>
+
+fmul    {z20.d-z21.d}, {z10.d-z11.d}, z10.d  // 11000001-11110100-11101001-01010100
+// CHECK-INST: fmul    { z20.d, z21.d }, { z10.d, z11.d }, z10.d
+// CHECK-ENCODING: [0x54,0xe9,0xf4,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1f4e954 <unknown>
+
+fmul    {z30.d-z31.d}, {z30.d-z31.d}, z15.d  // 11000001-11111110-11101011-11011110
+// CHECK-INST: fmul    { z30.d, z31.d }, { z30.d, z31.d }, z15.d
+// CHECK-ENCODING: [0xde,0xeb,0xfe,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1feebde <unknown>
+
+// Multiple and single, 4 regs
+
+// 16-bit elements
+
+fmul    {z0.h-z3.h}, {z0.h-z3.h}, z0.h  // 11000001-01100001-11101000-00000000
+// CHECK-INST: fmul    { z0.h - z3.h }, { z0.h - z3.h }, z0.h
+// CHECK-ENCODING: [0x00,0xe8,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c161e800 <unknown>
+
+fmul    {z20.h-z23.h}, {z8.h-z11.h}, z10.h  // 11000001-01110101-11101001-00010100
+// CHECK-INST: fmul    { z20.h - z23.h }, { z8.h - z11.h }, z10.h
+// CHECK-ENCODING: [0x14,0xe9,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c175e914 <unknown>
+
+fmul    {z28.h-z31.h}, {z28.h-z31.h}, z15.h  // 11000001-01111111-11101011-10011100
+// CHECK-INST: fmul    { z28.h - z31.h }, { z28.h - z31.h }, z15.h
+// CHECK-ENCODING: [0x9c,0xeb,0x7f,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c17feb9c <unknown>
+
+// 32-bit elements
+
+fmul    {z0.s-z3.s}, {z0.s-z3.s}, z0.s  // 11000001-10100001-11101000-00000000
+// CHECK-INST: fmul    { z0.s - z3.s }, { z0.s - z3.s }, z0.s
+// CHECK-ENCODING: [0x00,0xe8,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1a1e800 <unknown>
+
+fmul    {z20.s-z23.s}, {z8.s-z11.s}, z10.s  // 11000001-10110101-11101001-00010100
+// CHECK-INST: fmul    { z20.s - z23.s }, { z8.s - z11.s }, z10.s
+// CHECK-ENCODING: [0x14,0xe9,0xb5,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1b5e914 <unknown>
+
+fmul    {z28.s-z31.s}, {z28.s-z31.s}, z15.s  // 11000001-10111111-11101011-10011100
+// CHECK-INST: fmul    { z28.s - z31.s }, { z28.s - z31.s }, z15.s
+// CHECK-ENCODING: [0x9c,0xeb,0xbf,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1bfeb9c <unknown>
+
+// 64-bit elements
+
+fmul    {z0.d-z3.d}, {z0.d-z3.d}, z0.d  // 11000001-11100001-11101000-00000000
+// CHECK-INST: fmul    { z0.d - z3.d }, { z0.d - z3.d }, z0.d
+// CHECK-ENCODING: [0x00,0xe8,0xe1,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1e1e800 <unknown>
+
+fmul    {z20.d-z23.d}, {z8.d-z11.d}, z10.d  // 11000001-11110101-11101001-00010100
+// CHECK-INST: fmul    { z20.d - z23.d }, { z8.d - z11.d }, z10.d
+// CHECK-ENCODING: [0x14,0xe9,0xf5,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1f5e914 <unknown>
+
+fmul    {z28.d-z31.d}, {z28.d-z31.d}, z15.d  // 11000001-11111111-11101011-10011100
+// CHECK-INST: fmul    { z28.d - z31.d }, { z28.d - z31.d }, z15.d
+// CHECK-ENCODING: [0x9c,0xeb,0xff,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1ffeb9c <unknown>
+
+// Multiple, 2 regs
+
+// 16-bit elements
+
+fmul    {z0.h-z1.h}, {z0.h-z1.h}, {z0.h-z1.h}  // 11000001-01100000-11100100-00000000
+// CHECK-INST: fmul    { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h }
+// CHECK-ENCODING: [0x00,0xe4,0x60,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c160e400 <unknown>
+
+fmul    {z20.h-z21.h}, {z10.h-z11.h}, {z20.h-z21.h}  // 11000001-01110100-11100101-01010100
+// CHECK-INST: fmul    { z20.h, z21.h }, { z10.h, z11.h }, { z20.h, z21.h }
+// CHECK-ENCODING: [0x54,0xe5,0x74,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c174e554 <unknown>
+
+fmul    {z30.h-z31.h}, {z30.h-z31.h}, {z30.h-z31.h}  // 11000001-01111110-11100111-11011110
+// CHECK-INST: fmul    { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h }
+// CHECK-ENCODING: [0xde,0xe7,0x7e,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c17ee7de <unknown>
+
+// 32-bit elememnts
+
+fmul    {z0.s-z1.s}, {z0.s-z1.s}, {z0.s-z1.s}  // 11000001-10100000-11100100-00000000
+// CHECK-INST: fmul    { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s }
+// CHECK-ENCODING: [0x00,0xe4,0xa0,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1a0e400 <unknown>
+
+fmul    {z20.s-z21.s}, {z10.s-z11.s}, {z20.s-z21.s}  // 11000001-10110100-11100101-01010100
+// CHECK-INST: fmul    { z20.s, z21.s }, { z10.s, z11.s }, { z20.s, z21.s }
+// CHECK-ENCODING: [0x54,0xe5,0xb4,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1b4e554 <unknown>
+
+fmul    {z30.s-z31.s}, {z30.s-z31.s}, {z30.s-z31.s}  // 11000001-10111110-11100111-11011110
+// CHECK-INST: fmul    { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s }
+// CHECK-ENCODING: [0xde,0xe7,0xbe,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1bee7de <unknown>
+
+// 64-bit elements
+
+fmul    {z0.d-z1.d}, {z0.d-z1.d}, {z0.d-z1.d}  // 11000001-11100000-11100100-00000000
+// CHECK-INST: fmul    { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d }
+// CHECK-ENCODING: [0x00,0xe4,0xe0,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1e0e400 <unknown>
+
+fmul    {z20.d-z21.d}, {z10.d-z11.d}, {z20.d-z21.d}  // 11000001-11110100-11100101-01010100
+// CHECK-INST: fmul    { z20.d, z21.d }, { z10.d, z11.d }, { z20.d, z21.d }
+// CHECK-ENCODING: [0x54,0xe5,0xf4,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1f4e554 <unknown>
+
+fmul    {z30.d-z31.d}, {z30.d-z31.d}, {z30.d-z31.d}  // 11000001-11111110-11100111-11011110
+// CHECK-INST: fmul    { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d }
+// CHECK-ENCODING: [0xde,0xe7,0xfe,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1fee7de <unknown>
+
+// Multiple, 4 regs
+
+// 16-bit elements
+
+fmul    {z0.h-z3.h}, {z0.h-z3.h}, {z0.h-z3.h}  // 11000001-01100001-11100100-00000000
+// CHECK-INST: fmul    { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h }
+// CHECK-ENCODING: [0x00,0xe4,0x61,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c161e400 <unknown>
+
+fmul    {z20.h-z23.h}, {z8.h-z11.h}, {z20.h-z23.h}  // 11000001-01110101-11100101-00010100
+// CHECK-INST: fmul    { z20.h - z23.h }, { z8.h - z11.h }, { z20.h - z23.h }
+// CHECK-ENCODING: [0x14,0xe5,0x75,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c175e514 <unknown>
+
+fmul    {z28.h-z31.h}, {z28.h-z31.h}, {z28.h-z31.h}  // 11000001-01111101-11100111-10011100
+// CHECK-INST: fmul    { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h }
+// CHECK-ENCODING: [0x9c,0xe7,0x7d,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c17de79c <unknown>
+
+// 32-bit elements
+
+fmul    {z0.s-z3.s}, {z0.s-z3.s}, {z0.s-z3.s}  // 11000001-10100001-11100100-00000000
+// CHECK-INST: fmul    { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s }
+// CHECK-ENCODING: [0x00,0xe4,0xa1,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1a1e400 <unknown>
+
+fmul    {z20.s-z23.s}, {z8.s-z11.s}, {z20.s-z23.s}  // 11000001-10110101-11100101-00010100
+// CHECK-INST: fmul    { z20.s - z23.s }, { z8.s - z11.s }, { z20.s - z23.s }
+// CHECK-ENCODING: [0x14,0xe5,0xb5,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1b5e514 <unknown>
+
+fmul    {z28.s-z31.s}, {z28.s-z31.s}, {z28.s-z31.s}  // 11000001-10111101-11100111-10011100
+// CHECK-INST: fmul    { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s }
+// CHECK-ENCODING: [0x9c,0xe7,0xbd,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1bde79c <unknown>
+
+// 64-bit elements
+
+fmul    {z0.d-z3.d}, {z0.d-z3.d}, {z0.d-z3.d}  // 11000001-11100001-11100100-00000000
+// CHECK-INST: fmul    { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d }
+// CHECK-ENCODING: [0x00,0xe4,0xe1,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1e1e400 <unknown>
+
+fmul    {z20.d-z23.d}, {z8.d-z11.d}, {z20.d-z23.d}  // 11000001-11110101-11100101-00010100
+// CHECK-INST: fmul    { z20.d - z23.d }, { z8.d - z11.d }, { z20.d - z23.d }
+// CHECK-ENCODING: [0x14,0xe5,0xf5,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1f5e514 <unknown>
+
+fmul    {z28.d-z31.d}, {z28.d-z31.d}, {z28.d-z31.d}  // 11000001-11111101-11100111-10011100
+// CHECK-INST: fmul    { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d }
+// CHECK-ENCODING: [0x9c,0xe7,0xfd,0xc1]
+// CHECK-ERROR: instruction requires: sme2p2
+// CHECK-UNKNOWN: c1fde79c <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s b/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s
index 013f15f8b6e0050..6c55ebe4088ff11 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvt-diagnostics.s
@@ -11,7 +11,7 @@ bfcvt z0.h, p0/m, z1.h
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 bfcvt z0.h, p0/z, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: bfcvt z0.h, p0/z, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE/compact-diagnostics.s b/llvm/test/MC/AArch64/SVE/compact-diagnostics.s
index a3d86267d917b53..b8ff8cc46201f28 100644
--- a/llvm/test/MC/AArch64/SVE/compact-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/compact-diagnostics.s
@@ -28,12 +28,12 @@ compact z31.s, p7, z31.d
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 compact z31.b, p7, z31.b
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: compact z31.b, p7, z31.b
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
 compact z31.h, p7, z31.h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: compact z31.h, p7, z31.h
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE/compact.s b/llvm/test/MC/AArch64/SVE/compact.s
index ff815980781d79e..a9b47dea246bee9 100644
--- a/llvm/test/MC/AArch64/SVE/compact.s
+++ b/llvm/test/MC/AArch64/SVE/compact.s
@@ -12,11 +12,11 @@
 compact z31.s, p7, z31.s
 // CHECK-INST: compact z31.s, p7, z31.s
 // CHECK-ENCODING: [0xff,0x9f,0xa1,0x05]
-// CHECK-ERROR: instruction requires: sve
+// CHECK-ERROR: instruction requires: sve or sme2p2
 // CHECK-UNKNOWN: 05a19fff <unknown>
 
 compact z31.d, p7, z31.d
 // CHECK-INST: compact z31.d, p7, z31.d
 // CHECK-ENCODING: [0xff,0x9f,0xe1,0x05]
-// CHECK-ERROR: instruction requires: sve
+// CHECK-ERROR: instruction requires: sve or sme2p2
 // CHECK-UNKNOWN: 05e19fff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s
index 8ae4d4992844239..2fe43f7aa8444cc 100644
--- a/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/matrix-multiply-fp-diagnostics.s
@@ -3,11 +3,6 @@
 // --------------------------------------------------------------------------//
 // FMMLA (SVE)
 
-// Invalid element size
-
-fmmla z0.h, z1.h, z2.h
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
-
 // Mis-matched element size
 
 fmmla z0.d, z1.s, z2.s
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
index 858aaf9d13ecc4e..966bead071fe395 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch-negative.s
@@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s
 // CHECK: error: instruction requires: sve2-bitperm
 // CHECK-NEXT: bgrp z21.s, z10.s, z21.s
 
+.arch armv9-a+f8f16mm
+.arch armv9-a+nof8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z23.h, z13.b, z8.b
+
+.arch armv9-a+f8f32mm
+.arch armv9-a+nof8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.b, z8.b
+
+.arch armv9-a+sve-f16f32mm
+.arch armv9-a+nosve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.h, z8.h
+
 .arch armv9-a+sve-bfscale
 .arch armv9-a+nosve-bfscale
 bfscale z0.h, p0/m, z0.h, z0.h
 // CHECK: error: instruction requires: sve-bfscale
-// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
\ No newline at end of file
+// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch.s b/llvm/test/MC/AArch64/SVE2/directive-arch.s
index b9710b67f8a1d0a..99f6198a60abbcd 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch.s
@@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d
 bgrp z21.s, z10.s, z21.s
 // CHECK: bgrp z21.s, z10.s, z21.s
 
+.arch armv9-a+f8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: fmmla   z23.h, z13.b, z8.b
+
+.arch armv9-a+f8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: fmmla   z23.s, z13.b, z8.b
+
+.arch armv9-a+sve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: fmmla   z23.s, z13.h, z8.h
+
 .arch armv9-a+sve-bfscale
 bfscale z0.h, p0/m, z0.h, z0.h
-// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
\ No newline at end of file
+// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
index bd625d252626921..e967f5aa60bd738 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension-negative.s
@@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s
 // CHECK: error: instruction requires: sve2-bitperm
 // CHECK-NEXT: bgrp z21.s, z10.s, z21.s
 
+.arch_extension f8f16mm
+.arch_extension nof8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z23.h, z13.b, z8.b
+
+.arch_extension f8f32mm
+.arch_extension nof8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.b, z8.b
+
+.arch_extension sve-f16f32mm
+.arch_extension nosve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.h, z8.h
+
 .arch_extension sve-bfscale
 .arch_extension nosve-bfscale
 bfscale z0.h, p0/m, z0.h, z0.h
 // CHECK: error: instruction requires: sve-bfscale
-// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
\ No newline at end of file
+// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
diff --git a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
index 28d803bf7cc88c2..2fdbb525464d90f 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-arch_extension.s
@@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d
 bgrp z21.s, z10.s, z21.s
 // CHECK: bgrp z21.s, z10.s, z21.s
 
+.arch_extension f8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: fmmla   z23.h, z13.b, z8.b
+
+.arch_extension f8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: fmmla   z23.s, z13.b, z8.b
+
+.arch_extension sve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: fmmla   z23.s, z13.h, z8.h
+
 .arch_extension sve-bfscale
 bfscale z0.h, p0/m, z0.h, z0.h
-// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
\ No newline at end of file
+// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
index 45a04a58eac3b49..9a8af638b703780 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-cpu-negative.s
@@ -30,8 +30,26 @@ bgrp z21.s, z10.s, z21.s
 // CHECK: error: instruction requires: sve2-bitperm
 // CHECK-NEXT: bgrp z21.s, z10.s, z21.s
 
+.cpu generic+sve2+f8f16mm
+.cpu generic+sve2+nof8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z23.h, z13.b, z8.b
+
+.cpu generic+sve2+f8f32mm
+.cpu generic+sve2+nof8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.b, z8.b
+
+.cpu generic+sve-f16f32mm
+.cpu generic+nosve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z23.s, z13.h, z8.h
+
 .cpu generic+sve-bfscale
 .cpu generic+nosve-bfscale
 bfscale z0.h, p0/m, z0.h, z0.h
 // CHECK: error: instruction requires: sve-bfscale
-// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
\ No newline at end of file
+// CHECK-NEXT: bfscale z0.h, p0/m, z0.h, z0.h
diff --git a/llvm/test/MC/AArch64/SVE2/directive-cpu.s b/llvm/test/MC/AArch64/SVE2/directive-cpu.s
index 75d2321bf620779..daa5ec510b226ae 100644
--- a/llvm/test/MC/AArch64/SVE2/directive-cpu.s
+++ b/llvm/test/MC/AArch64/SVE2/directive-cpu.s
@@ -20,6 +20,18 @@ rax1 z0.d, z0.d, z0.d
 bgrp z21.s, z10.s, z21.s
 // CHECK: bgrp z21.s, z10.s, z21.s
 
+.cpu generic+sve2+f8f16mm
+fmmla   z23.h, z13.b, z8.b
+// CHECK: fmmla   z23.h, z13.b, z8.b
+
+.cpu generic+sve2+f8f32mm
+fmmla   z23.s, z13.b, z8.b
+// CHECK: fmmla   z23.s, z13.b, z8.b
+
+.cpu generic+sve-f16f32mm
+fmmla   z23.s, z13.h, z8.h
+// CHECK: fmmla   z23.s, z13.h, z8.h
+
 .cpu generic+sve-bfscale
 bfscale z0.h, p0/m, z0.h, z0.h
-// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
\ No newline at end of file
+// CHECK: bfscale z0.h, p0/m, z0.h, z0.h
diff --git a/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s
index 5f365496033633a..36c5d5fe9cbea6a 100644
--- a/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2/fcvtx-diagnostics.s
@@ -29,7 +29,7 @@ fcvtx    z0.d, p0/m, z0.d
 // Invalid predicate operation
 
 fcvtx   z0.s, p0/z, z0.d
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: fcvtx   z0.s, p0/z, z0.d
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s b/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s
index ddb8c4ff35b6a31..60d9f9e5e242d58 100644
--- a/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2/flogb-diagnostics.s
@@ -14,7 +14,7 @@ flogb   z0.b, p0/m, z0.b
 // Invalid predicate operation
 
 flogb   z0.s, p0/z, z0.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: flogb   z0.s, p0/z, z0.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s
new file mode 100644
index 000000000000000..924c123f0ca5bc9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm-diagnostics.s
@@ -0,0 +1,18 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-f16f32mm  2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// FMMLA (SVE)
+
+// Invalid element size
+
+fmmla z0.s, z1.b, z2.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f32mm
+fmmla z0.d, z1.h, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+
+// Mis-matched element size
+
+fmmla z0.s, z1.h, z2.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+fmmla z0.s, z1.d, z2.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s
new file mode 100644
index 000000000000000..84efcfe8b12e48b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f16f32mm.s
@@ -0,0 +1,41 @@
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve,+sve-f16f32mm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve,+sve-f16f32mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve,+sve-f16f32mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+
+movprfx z23, z31
+fmmla   z23.s, z13.h, z8.h  // 01100100-00101000-11100101-10110111
+// CHECK-INST:  movprfx z23, z31
+// CHECK-INST: fmmla   z23.s, z13.h, z8.h
+// CHECK-ENCODING: [0xb7,0xe5,0x28,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 6428e5b7 <unknown>
+
+fmmla   z0.s, z0.h, z0.h  // 01100100-00100000-11100100-00000000
+// CHECK-INST: fmmla   z0.s, z0.h, z0.h
+// CHECK-ENCODING: [0x00,0xe4,0x20,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 6420e400 <unknown>
+
+fmmla   z23.s, z13.h, z8.h  // 01100100-00101000-11100101-10110111
+// CHECK-INST: fmmla   z23.s, z13.h, z8.h
+// CHECK-ENCODING: [0xb7,0xe5,0x28,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 6428e5b7 <unknown>
+
+fmmla   z31.s, z31.h, z31.h  // 01100100-00111111-11100111-11111111
+// CHECK-INST: fmmla   z31.s, z31.h, z31.h
+// CHECK-ENCODING: [0xff,0xe7,0x3f,0x64]
+// CHECK-ERROR: instruction requires: sve-f16f32mm
+// CHECK-UNKNOWN: 643fe7ff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s
new file mode 100644
index 000000000000000..59818d2d24a481c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm-diagnostics.s
@@ -0,0 +1,24 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm   2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmmla   z21.b, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.b, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.d, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.d, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.h, z21.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.h, z21.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.s, z21.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.s, z21.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s
new file mode 100644
index 000000000000000..ff343548993cfe5
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f16mm.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2,+f8f16mm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f16mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+f8f16mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+movprfx z23, z31
+fmmla   z23.h, z13.b, z8.b  // 01100100-01101000-11100001-10110111
+// CHECK-INST:  movprfx z23, z31
+// CHECK-INST: fmmla   z23.h, z13.b, z8.b
+// CHECK-ENCODING: [0xb7,0xe1,0x68,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 6468e1b7 <unknown>
+
+fmmla   z0.h, z0.b, z0.b  // 01100100-01100000-11100000-00000000
+// CHECK-INST: fmmla   z0.h, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0xe0,0x60,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 6460e000 <unknown>
+
+fmmla   z21.h, z10.b, z21.b  // 01100100-01110101-11100001-01010101
+// CHECK-INST: fmmla   z21.h, z10.b, z21.b
+// CHECK-ENCODING: [0x55,0xe1,0x75,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 6475e155 <unknown>
+
+fmmla   z31.h, z31.b, z31.b  // 01100100-01111111-11100011-11111111
+// CHECK-INST: fmmla   z31.h, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0xe3,0x7f,0x64]
+// CHECK-ERROR: instruction requires: f8f16mm sve2
+// CHECK-UNKNOWN: 647fe3ff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s
new file mode 100644
index 000000000000000..0b1eb1b24e26438
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm-diagnostics.s
@@ -0,0 +1,30 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm   2>&1 < %s| FileCheck %s
+
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+fmmla   z21.b, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.b, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.h, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla   z21.h, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.d, z10.b, z21.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fmmla   z21.d, z10.b, z21.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.h, z21.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sve-f16f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.h, z21.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fmmla   z21.s, z10.s, z21.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: f32mm
+// CHECK-NEXT: fmmla   z21.s, z10.s, z21.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s
new file mode 100644
index 000000000000000..8b59a112dc61b4c
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2/fmmla-f8f32mm.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+f8f32mm < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+f8f32mm -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+movprfx z23, z31
+fmmla   z23.s, z13.b, z8.b  // 01100100-00101000-11100001-10110111
+// CHECK-INST:  movprfx z23, z31
+// CHECK-INST: fmmla   z23.s, z13.b, z8.b
+// CHECK-ENCODING: [0xb7,0xe1,0x28,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 6428e1b7 <unknown>
+
+fmmla   z0.s, z0.b, z0.b  // 01100100-00100000-11100000-00000000
+// CHECK-INST: fmmla   z0.s, z0.b, z0.b
+// CHECK-ENCODING: [0x00,0xe0,0x20,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 6420e000 <unknown>
+
+fmmla   z21.s, z10.b, z21.b  // 01100100-00110101-11100001-01010101
+// CHECK-INST: fmmla   z21.s, z10.b, z21.b
+// CHECK-ENCODING: [0x55,0xe1,0x35,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 6435e155 <unknown>
+
+fmmla   z31.s, z31.b, z31.b  // 01100100-00111111-11100011-11111111
+// CHECK-INST: fmmla   z31.s, z31.b, z31.b
+// CHECK-ENCODING: [0xff,0xe3,0x3f,0x64]
+// CHECK-ERROR: instruction requires: f8f32mm sve2
+// CHECK-UNKNOWN: 643fe3ff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s
index 8b3a136a7d74293..7dd268dd7cddd19 100644
--- a/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2/sqabs-diagnostics.s
@@ -4,7 +4,7 @@
 // Invalid predicate
 
 sqabs z0.s, p0/z, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: sqabs z0.s, p0/z, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s b/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s
index 7b0f5722b94a3fa..372adad0427c092 100644
--- a/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2/sqneg-diagnostics.s
@@ -4,7 +4,7 @@
 // Invalid predicate
 
 sqneg z0.s, p0/z, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: sqneg z0.s, p0/z, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s b/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s
index f04538494cd6f05..73bb6cecffa5a41 100644
--- a/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2/urecpe-diagnostics.s
@@ -4,7 +4,7 @@
 // Invalid predicate
 
 urecpe z0.s, p0/z, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: urecpe z0.s, p0/z, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s
index 2190ff1ebd82ba4..0c6746a4a7c1fe3 100644
--- a/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE2/ursqrte-diagnostics.s
@@ -4,7 +4,7 @@
 // Invalid predicate
 
 ursqrte z0.s, p0/z, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2
 // CHECK-NEXT: ursqrte z0.s, p0/z, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s
new file mode 100644
index 000000000000000..61c2b6eff969d74
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p1/pmlal-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+sve-aes2 2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid vector list
+
+pmlal   {z0.q-z2.q}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: pmlal   {z0.q-z2.q}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmlal   {z0.q-z0.q}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: pmlal   {z0.q-z0.q}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmlal   {z1.q-z2.q}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: pmlal   {z1.q-z2.q}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmlal   {z0.d-z1.d}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: pmlal   {z0.d-z1.d}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid single source vectors
+
+pmlal   {z0.q-z1.q}, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmlal   {z0.q-z1.q}, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmlal   {z0.q-z1.q}, z0.d, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmlal   {z0.q-z1.q}, z0.d, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p1/pmlal.s b/llvm/test/MC/AArch64/SVE2p1/pmlal.s
new file mode 100644
index 000000000000000..0420b230956c08b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p1/pmlal.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+ssve-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve-aes2,+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve-aes2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve-aes2,+sve2p1 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+pmlal   {z0.q-z1.q}, z0.d, z0.d  // 01000101-00100000-11111100-00000000
+// CHECK-INST: pmlal   { z0.q, z1.q }, z0.d, z0.d
+// CHECK-ENCODING: [0x00,0xfc,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2
+// CHECK-UNKNOWN: 4520fc00 <unknown>
+
+pmlal   {z22.q-z23.q}, z13.d, z8.d  // 01000101-00101000-11111101-10110110
+// CHECK-INST: pmlal   { z22.q, z23.q }, z13.d, z8.d
+// CHECK-ENCODING: [0xb6,0xfd,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2
+// CHECK-UNKNOWN: 4528fdb6 <unknown>
+
+pmlal   {z30.q-z31.q}, z31.d, z31.d  // 01000101-00111111-11111111-11111110
+// CHECK-INST: pmlal   { z30.q, z31.q }, z31.d, z31.d
+// CHECK-ENCODING: [0xfe,0xff,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2
+// CHECK-UNKNOWN: 453ffffe <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s
new file mode 100644
index 000000000000000..3aaef0cddf4a070
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p1/pmull-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+sve-aes2 2>&1 < %s | FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid vector list
+
+pmull   {z0.q-z2.q}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: pmull   {z0.q-z2.q}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmull   {z0.q-z0.q}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors
+// CHECK-NEXT: pmull   {z0.q-z0.q}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmull   {z1.q-z2.q}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types
+// CHECK-NEXT: pmull   {z1.q-z2.q}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmull   {z0.d-z1.d}, z0.d, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: pmull   {z0.d-z1.d}, z0.d, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid single source vectors
+
+pmull   {z0.q-z1.q}, z0.s, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmull   {z0.q-z1.q}, z0.s, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+pmull   {z0.q-z1.q}, z0.d, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: pmull   {z0.q-z1.q}, z0.d, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p1/pmull.s b/llvm/test/MC/AArch64/SVE2p1/pmull.s
new file mode 100644
index 000000000000000..9c3ee16401c1215
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p1/pmull.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+ssve-aes < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve-aes2,+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve-aes2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve-aes2,+sve2p1 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve-aes2,+sve2p1 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+pmull   {z0.q-z1.q}, z0.d, z0.d  // 01000101-00100000-11111000-00000000
+// CHECK-INST: pmull   { z0.q, z1.q }, z0.d, z0.d
+// CHECK-ENCODING: [0x00,0xf8,0x20,0x45]
+// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2
+// CHECK-UNKNOWN: 4520f800 <unknown>
+
+pmull   {z22.q-z23.q}, z13.d, z8.d  // 01000101-00101000-11111001-10110110
+// CHECK-INST: pmull   { z22.q, z23.q }, z13.d, z8.d
+// CHECK-ENCODING: [0xb6,0xf9,0x28,0x45]
+// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2
+// CHECK-UNKNOWN: 4528f9b6 <unknown>
+
+pmull   {z30.q-z31.q}, z31.d, z31.d  // 01000101-00111111-11111011-11111110
+// CHECK-INST: pmull   { z30.q, z31.q }, z31.d, z31.d
+// CHECK-ENCODING: [0xfe,0xfb,0x3f,0x45]
+// CHECK-ERROR: instruction requires: sve2p1 or ssve-aes sve-aes2
+// CHECK-UNKNOWN: 453ffbfe <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s
new file mode 100644
index 000000000000000..30be5d19c4aae76
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z-diagnostics.s
@@ -0,0 +1,60 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+bfcvt z0.b, p0/z, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: bfcvt z0.b, p0/z, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+bfcvt z0.h, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.h, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.h, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.h, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.s, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.s, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.s, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.s, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+bfcvt z0.d, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: bfcvt z0.d, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+bfcvt    z0.h, p8/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: bfcvt    z0.h, p8/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/m, z7.s
+bfcvt z0.h, p7/z, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bfcvt z0.h, p7/z, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+bfcvt z0.h, p7/z, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bfcvt z0.h, p7/z, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s
new file mode 100644
index 000000000000000..9d63ebf1e830985
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/bfcvt_z.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+bfcvt   z0.h, p0/z, z0.s  // 01100100-10011010-11000000-00000000
+// CHECK-INST: bfcvt   z0.h, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0xc0,0x9a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649ac000 <unknown>
+
+bfcvt   z21.h, p5/z, z10.s  // 01100100-10011010-11010101-01010101
+// CHECK-INST: bfcvt   z21.h, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0xd5,0x9a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649ad555 <unknown>
+
+bfcvt   z31.h, p7/z, z31.s  // 01100100-10011010-11011111-11111111
+// CHECK-INST: bfcvt   z31.h, p7/z, z31.s
+// CHECK-ENCODING: [0xff,0xdf,0x9a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649adfff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s
new file mode 100644
index 000000000000000..acf00e7f7a600fb
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid element widths
+
+compact z31.h, p7, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: compact z31.h, p7, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+compact z31.b, p7, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: compact z31.b, p7, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+compact z23.b, p7/m, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: compact z23.b, p7/m, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+compact z23.b, p7.b, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: compact z23.b, p7.b, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+compact z23.h, p7/z, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: compact z23.h, p7/z, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+compact z23.h, p7.h, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: compact z23.h, p7.h, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+compact z23.b, p8, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: compact z23.b, p8, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+compact z23.h, p8, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: compact z23.h, p8, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31.b, p7/z, z6.b
+compact z31.b, p7, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: compact z31.b, p7, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31, z6
+compact z31.h, p7, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: compact z31.h, p7, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/compact.s b/llvm/test/MC/AArch64/SVE2p2/compact.s
new file mode 100644
index 000000000000000..0170b3832bea674
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/compact.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+compact z0.b, p0, z0.b  // 00000101-00100001-10000000-00000000
+// CHECK-INST: compact z0.b, p0, z0.b
+// CHECK-ENCODING: [0x00,0x80,0x21,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05218000 <unknown>
+
+compact z21.b, p5, z10.b  // 00000101-00100001-10010101-01010101
+// CHECK-INST: compact z21.b, p5, z10.b
+// CHECK-ENCODING: [0x55,0x95,0x21,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05219555 <unknown>
+
+compact z31.h, p7, z31.h  // 00000101-01100001-10011111-11111111
+// CHECK-INST: compact z31.h, p7, z31.h
+// CHECK-ENCODING: [0xff,0x9f,0x61,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05619fff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s
new file mode 100644
index 000000000000000..b9a95f399a168a7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s
@@ -0,0 +1,120 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2  2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid element widths.
+
+expand  z23.b, p3, z13.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: expand  z23.b, p3, z13.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.h, p3, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: expand  z23.h, p3, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.s, p3, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: expand  z23.s, p3, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.d, p3, z13.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: expand  z23.d, p3, z13.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.q, p3, z13.q
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: expand  z23.q, p3, z13.q
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid predicate operation
+
+expand  z23.b, p3/z, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: expand  z23.b, p3/z, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.b, p3.b, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.b, p3.b, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.h, p3/m, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: expand  z23.h, p3/m, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.h, p3.h, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.h, p3.h, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.s, p3/z, z13.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: expand  z23.s, p3/z, z13.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.s, p3.s, z13.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.s, p3.s, z13.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.d, p3/m, z13.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: expand  z23.d, p3/m, z13.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.d, p3.d, z13.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.d, p3.d, z13.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+expand  z23.b, p8, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.b, p8, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.b, p3.b, z13.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.b, p3.b, z13.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.h, p8, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.h, p8, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.h, p3.h, z13.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.h, p3.h, z13.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}
+
+expand  z23.s, p8, z13.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.s, p8, z13.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+expand  z23.d, p8, z13.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: expand  z23.d, p8, z13.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z31, z6
+expand  z31.b, p7, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: expand  z31.b, p7, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z31.b, p0/z, z6.b
+expand  z31.b, p0, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: expand  z31.b, p0, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/expand.s b/llvm/test/MC/AArch64/SVE2p2/expand.s
new file mode 100644
index 000000000000000..7523978380fbd7e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/expand.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+expand  z0.b, p0, z0.b  // 00000101-00110001-10000000-00000000
+// CHECK-INST: expand  z0.b, p0, z0.b
+// CHECK-ENCODING: [0x00,0x80,0x31,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05318000 <unknown>
+
+expand  z21.h, p5, z10.h  // 00000101-01110001-10010101-01010101
+// CHECK-INST: expand  z21.h, p5, z10.h
+// CHECK-ENCODING: [0x55,0x95,0x71,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05719555 <unknown>
+
+expand  z23.s, p3, z13.s  // 00000101-10110001-10001101-10110111
+// CHECK-INST: expand  z23.s, p3, z13.s
+// CHECK-ENCODING: [0xb7,0x8d,0xb1,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05b18db7 <unknown>
+
+expand  z31.d, p7, z31.d  // 00000101-11110001-10011111-11111111
+// CHECK-INST: expand  z31.d, p7, z31.d
+// CHECK-ENCODING: [0xff,0x9f,0xf1,0x05]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 05f19fff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s
new file mode 100644
index 000000000000000..37f4a0ffbe6a2af
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvt_z-diagnostics.s
@@ -0,0 +1,50 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid operand
+
+fcvt    z0.b, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: fcvt    z0.b, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fcvt    z0.h, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvt    z0.h, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvt    z0.s, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvt    z0.s, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvt    z0.d, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvt    z0.d, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fcvt    z0.s, p8/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fcvt    z0.s, p8/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/m, z7.s
+fcvt z0.s, p7/z, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvt z0.s, p7/z, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvt z0.s, p7/z, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvt z0.s, p7/z, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvt_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvt_z.s
new file mode 100644
index 000000000000000..6cd9f1ba503210e
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvt_z.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// convert to half
+
+fcvt    z0.h, p0/z, z0.s  // 01100100-10011010-10000000-00000000
+// CHECK-INST: fcvt    z0.h, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0x80,0x9a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649a8000 <unknown>
+
+fcvt    z23.h, p3/z, z13.d  // 01100100-11011010-10001101-10110111
+// CHECK-INST: fcvt    z23.h, p3/z, z13.d
+// CHECK-ENCODING: [0xb7,0x8d,0xda,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64da8db7 <unknown>
+
+// convert to single
+
+fcvt    z0.s, p0/z, z0.h  // 01100100-10011010-10100000-00000000
+// CHECK-INST: fcvt    z0.s, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xa0,0x9a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649aa000 <unknown>
+
+fcvt    z31.s, p7/z, z31.d  // 01100100-11011010-11011111-11111111
+// CHECK-INST: fcvt    z31.s, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xdf,0xda,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dadfff <unknown>
+
+// convert to double
+
+fcvt    z21.d, p5/z, z10.h  // 01100100-11011010-10110101-01010101
+// CHECK-INST: fcvt    z21.d, p5/z, z10.h
+// CHECK-ENCODING: [0x55,0xb5,0xda,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dab555 <unknown>
+
+fcvt    z31.d, p7/z, z31.s  // 01100100-11011010-11111111-11111111
+// CHECK-INST: fcvt    z31.d, p7/z, z31.s
+// CHECK-ENCODING: [0xff,0xff,0xda,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64daffff <unknown
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtx_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvtx_z-diagnostics.s
new file mode 100644
index 000000000000000..d5876773004fda9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvtx_z-diagnostics.s
@@ -0,0 +1,57 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+fcvtx z0.b, p0/z, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx z0.b, p0/z, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx z0.h, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx z0.h, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx z0.s, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx z0.s, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx z0.d, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx z0.d, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx z0.h, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx z0.h, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtx z0.b, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtx z0.b, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Predicate not in restricted predicate range
+
+fcvtx    z0.s, p8/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fcvtx    z0.s, p8/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/m, z7.s
+fcvtx z0.s, p7/z, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtx z0.s, p7/z, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvtx z0.s, p7/z, z1.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtx z0.s, p7/z, z1.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s
new file mode 100644
index 000000000000000..e5e2155ea5d8785
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvtx_z.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fcvtx   z0.s, p0/z, z0.d  // 01100100-00011010-11000000-00000000
+// CHECK-INST: fcvtx   z0.s, p0/z, z0.d
+// CHECK-ENCODING: [0x00,0xc0,0x1a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 641ac000 <unknown>
+
+fcvtx   z23.s, p3/z, z13.d  // 01100100-00011010-11001101-10110111
+// CHECK-INST: fcvtx   z23.s, p3/z, z13.d
+// CHECK-ENCODING: [0xb7,0xcd,0x1a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 641acdb7 <unknown>
+
+fcvtx   z31.s, p7/z, z31.d  // 01100100-00011010-11011111-11111111
+// CHECK-INST: fcvtx   z31.s, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xdf,0x1a,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 641adfff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s
new file mode 100644
index 000000000000000..1408cba4070bf4b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+fcvtzs    z0.h, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtzs    z0.h, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtzs    z0.h, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtzs    z0.h, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+fcvtzs    z0.h, p8/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fcvtzs    z0.h, p8/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcvtzs z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtzs z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvtzs z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtzs z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s
new file mode 100644
index 000000000000000..a37f83c0f97b7ca
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzs_z.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// convert from half
+
+fcvtzs  z0.h, p0/z, z0.h  // 01100100-01011110-11000000-00000000
+// CHECK-INST: fcvtzs  z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xc0,0x5e,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645ec000 <unknown>
+
+fcvtzs  z23.s, p3/z, z13.h  // 01100100-01011111-10001101-10110111
+// CHECK-INST: fcvtzs  z23.s, p3/z, z13.h
+// CHECK-ENCODING: [0xb7,0x8d,0x5f,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645f8db7 <unknown>
+
+fcvtzs  z31.d, p7/z, z31.h  // 01100100-01011111-11011111-11111111
+// CHECK-INST: fcvtzs  z31.d, p7/z, z31.h
+// CHECK-ENCODING: [0xff,0xdf,0x5f,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645fdfff <unknown>
+
+// convert from single
+
+fcvtzs  z0.s, p0/z, z0.s  // 01100100-10011111-10000000-00000000
+// CHECK-INST: fcvtzs  z0.s, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0x80,0x9f,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649f8000 <unknown>
+
+fcvtzs  z21.d, p5/z, z10.s  // 01100100-11011111-10010101-01010101
+// CHECK-INST: fcvtzs  z21.d, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0x95,0xdf,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64df9555 <unknown>
+
+// convert from double
+
+fcvtzs  z23.s, p3/z, z13.d  // 01100100-11011110-10001101-10110111
+// CHECK-INST: fcvtzs  z23.s, p3/z, z13.d
+// CHECK-ENCODING: [0xb7,0x8d,0xde,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64de8db7 <unknown>
+
+fcvtzs  z31.d, p7/z, z31.d  // 01100100-11011111-11011111-11111111
+// CHECK-INST: fcvtzs  z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xdf,0xdf,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dfdfff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s
new file mode 100644
index 000000000000000..fc4ecda82bd2007
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+fcvtzu    z0.h, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtzu    z0.h, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fcvtzu    z0.h, p0/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fcvtzu    z0.h, p0/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+fcvtzu    z0.h, p8/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fcvtzu    z0.h, p8/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+fcvtzu z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtzu z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fcvtzu z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fcvtzu z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s
new file mode 100644
index 000000000000000..df1ac4016689b78
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/fcvtzu_z.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// convert from half
+
+fcvtzu  z0.h, p0/z, z0.h  // 01100100-01011110-11100000-00000000
+// CHECK-INST: fcvtzu  z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xe0,0x5e,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645ee000 <unknown>
+
+fcvtzu  z21.s, p5/z, z10.h  // 01100100-01011111-10110101-01010101
+// CHECK-INST: fcvtzu  z21.s, p5/z, z10.h
+// CHECK-ENCODING: [0x55,0xb5,0x5f,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645fb555 <unknown>
+
+fcvtzu  z23.d, p3/z, z13.h  // 01100100-01011111-11101101-10110111
+// CHECK-INST: fcvtzu  z23.d, p3/z, z13.h
+// CHECK-ENCODING: [0xb7,0xed,0x5f,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645fedb7 <unknown>
+
+// convert from single
+
+fcvtzu  z21.s, p5/z, z10.s  // 01100100-10011111-10110101-01010101
+// CHECK-INST: fcvtzu  z21.s, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0xb5,0x9f,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649fb555 <unknown>
+
+fcvtzu  z31.d, p7/z, z31.s  // 01100100-11011111-10111111-11111111
+// CHECK-INST: fcvtzu  z31.d, p7/z, z31.s
+// CHECK-ENCODING: [0xff,0xbf,0xdf,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dfbfff <unknown>
+
+// convert from double
+
+fcvtzu  z0.s, p0/z, z0.d  // 01100100-11011110-10100000-00000000
+// CHECK-INST: fcvtzu  z0.s, p0/z, z0.d
+// CHECK-ENCODING: [0x00,0xa0,0xde,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dea000 <unknown>
+
+fcvtzu  z31.d, p7/z, z31.d  // 01100100-11011111-11111111-11111111
+// CHECK-INST: fcvtzu  z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xff,0xdf,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dfffff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s
new file mode 100644
index 000000000000000..4309fd49ecf79f1
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s
@@ -0,0 +1,32 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate operand
+
+firstp  x0, p15, p0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: firstp  x0, p15, p0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+firstp  x0, p15.b, p0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: firstp  x0, p15.b, p0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+firstp  x0, p15.q, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: firstp  x0, p15.q, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid register types
+
+firstp  sp, p15, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: firstp  sp, p15, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+firstp  w0, p15, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: firstp  w0, p15, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/firstp.s b/llvm/test/MC/AArch64/SVE2p2/firstp.s
new file mode 100644
index 000000000000000..629bee5576fc7d7
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/firstp.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+firstp  x0, p0, p0.b  // 00100101-00100001-10000000-00000000
+// CHECK-INST: firstp  x0, p0, p0.b
+// CHECK-ENCODING: [0x00,0x80,0x21,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25218000 <unknown>
+
+firstp  x23, p11, p13.b  // 00100101-00100001-10101101-10110111
+// CHECK-INST: firstp  x23, p11, p13.b
+// CHECK-ENCODING: [0xb7,0xad,0x21,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2521adb7 <unknown>
+
+firstp  xzr, p15, p15.b  // 00100101-00100001-10111101-11111111
+// CHECK-INST: firstp  xzr, p15, p15.b
+// CHECK-ENCODING: [0xff,0xbd,0x21,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2521bdff <unknown>
+
+firstp  x0, p0, p0.h  // 00100101-01100001-10000000-00000000
+// CHECK-INST: firstp  x0, p0, p0.h
+// CHECK-ENCODING: [0x00,0x80,0x61,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25618000 <unknown>
+
+firstp  x23, p11, p13.h  // 00100101-01100001-10101101-10110111
+// CHECK-INST: firstp  x23, p11, p13.h
+// CHECK-ENCODING: [0xb7,0xad,0x61,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2561adb7 <unknown>
+
+firstp  xzr, p15, p15.h  // 00100101-01100001-10111101-11111111
+// CHECK-INST: firstp  xzr, p15, p15.h
+// CHECK-ENCODING: [0xff,0xbd,0x61,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2561bdff <unknown>
+
+firstp  x0, p0, p0.s  // 00100101-10100001-10000000-00000000
+// CHECK-INST: firstp  x0, p0, p0.s
+// CHECK-ENCODING: [0x00,0x80,0xa1,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25a18000 <unknown>
+
+firstp  x23, p11, p13.s  // 00100101-10100001-10101101-10110111
+// CHECK-INST: firstp  x23, p11, p13.s
+// CHECK-ENCODING: [0xb7,0xad,0xa1,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25a1adb7 <unknown>
+
+firstp  xzr, p15, p15.s  // 00100101-10100001-10111101-11111111
+// CHECK-INST: firstp  xzr, p15, p15.s
+// CHECK-ENCODING: [0xff,0xbd,0xa1,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25a1bdff <unknown>
+
+firstp  x0, p0, p0.d  // 00100101-11100001-10000000-00000000
+// CHECK-INST: firstp  x0, p0, p0.d
+// CHECK-ENCODING: [0x00,0x80,0xe1,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25e18000 <unknown>
+
+firstp  x23, p11, p13.d  // 00100101-11100001-10101101-10110111
+// CHECK-INST: firstp  x23, p11, p13.d
+// CHECK-ENCODING: [0xb7,0xad,0xe1,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25e1adb7 <unknown>
+
+firstp  xzr, p15, p15.d  // 00100101-11100001-10111101-11111111
+// CHECK-INST: firstp  xzr, p15, p15.d
+// CHECK-ENCODING: [0xff,0xbd,0xe1,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25e1bdff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s
new file mode 100644
index 000000000000000..8fd528e1fc05d4a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/flogb_z-diagnostics.s
@@ -0,0 +1,47 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2  2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+flogb   z0.b, p0/z, z0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: flogb   z0.b, p0/z, z0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+flogb   z0.h, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: flogb   z0.h, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+flogb   z0.s, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: flogb   z0.s, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+flogb   z0.d, p0/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: flogb   z0.d, p0/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+flogb    z0.h, p8/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: flogb    z0.h, p8/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+flogb z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: flogb z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+flogb z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: flogb z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/flogb_z.s b/llvm/test/MC/AArch64/SVE2p2/flogb_z.s
new file mode 100644
index 000000000000000..1b056aa928ce234
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/flogb_z.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+flogb   z0.h, p0/z, z0.h  // 01100100-00011110-10100000-00000000
+// CHECK-INST: flogb   z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xa0,0x1e,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 641ea000 <unknown>
+
+flogb   z23.s, p3/z, z13.s  // 01100100-00011110-11001101-10110111
+// CHECK-INST: flogb   z23.s, p3/z, z13.s
+// CHECK-ENCODING: [0xb7,0xcd,0x1e,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 641ecdb7 <unknown>
+
+flogb   z31.d, p7/z, z31.d  // 01100100-00011110-11111111-11111111
+// CHECK-INST: flogb   z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xff,0x1e,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 641effff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s
new file mode 100644
index 000000000000000..e277bdbc6aa8b35
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s
@@ -0,0 +1,32 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate operand
+
+lastp  x0, p15, p0
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: lastp  x0, p15, p0
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+lastp  x0, p15.b, p0.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: lastp  x0, p15.b, p0.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+lastp  x0, p15.q, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register.
+// CHECK-NEXT: lastp  x0, p15.q, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid register types
+
+lastp  sp, p15, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: lastp  sp, p15, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+lastp  w0, p15, p0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// CHECK-NEXT: lastp  w0, p15, p0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/lastp.s b/llvm/test/MC/AArch64/SVE2p2/lastp.s
new file mode 100644
index 000000000000000..1ffa0a7d1fcc198
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/lastp.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+lastp   x0, p0, p0.b  // 00100101-00100010-10000000-00000000
+// CHECK-INST: lastp   x0, p0, p0.b
+// CHECK-ENCODING: [0x00,0x80,0x22,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25228000 <unknown>
+
+lastp   x23, p11, p13.b  // 00100101-00100010-10101101-10110111
+// CHECK-INST: lastp   x23, p11, p13.b
+// CHECK-ENCODING: [0xb7,0xad,0x22,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2522adb7 <unknown>
+
+lastp   xzr, p15, p15.b  // 00100101-00100010-10111101-11111111
+// CHECK-INST: lastp   xzr, p15, p15.b
+// CHECK-ENCODING: [0xff,0xbd,0x22,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2522bdff <unknown>
+
+lastp   x0, p0, p0.h  // 00100101-01100010-10000000-00000000
+// CHECK-INST: lastp   x0, p0, p0.h
+// CHECK-ENCODING: [0x00,0x80,0x62,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25628000 <unknown>
+
+lastp   x23, p11, p13.h  // 00100101-01100010-10101101-10110111
+// CHECK-INST: lastp   x23, p11, p13.h
+// CHECK-ENCODING: [0xb7,0xad,0x62,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2562adb7 <unknown>
+
+lastp   xzr, p15, p15.h  // 00100101-01100010-10111101-11111111
+// CHECK-INST: lastp   xzr, p15, p15.h
+// CHECK-ENCODING: [0xff,0xbd,0x62,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 2562bdff <unknown>
+
+lastp   x0, p0, p0.s  // 00100101-10100010-10000000-00000000
+// CHECK-INST: lastp   x0, p0, p0.s
+// CHECK-ENCODING: [0x00,0x80,0xa2,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25a28000 <unknown>
+
+lastp   x23, p11, p13.s  // 00100101-10100010-10101101-10110111
+// CHECK-INST: lastp   x23, p11, p13.s
+// CHECK-ENCODING: [0xb7,0xad,0xa2,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25a2adb7 <unknown>
+
+lastp   xzr, p15, p15.s  // 00100101-10100010-10111101-11111111
+// CHECK-INST: lastp   xzr, p15, p15.s
+// CHECK-ENCODING: [0xff,0xbd,0xa2,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25a2bdff <unknown>
+
+lastp   x0, p0, p0.d  // 00100101-11100010-10000000-00000000
+// CHECK-INST: lastp   x0, p0, p0.d
+// CHECK-ENCODING: [0x00,0x80,0xe2,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25e28000 <unknown>
+
+lastp   x23, p11, p13.d  // 00100101-11100010-10101101-10110111
+// CHECK-INST: lastp   x23, p11, p13.d
+// CHECK-ENCODING: [0xb7,0xad,0xe2,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25e2adb7 <unknown>
+
+lastp   xzr, p15, p15.d  // 00100101-11100010-10111101-11111111
+// CHECK-INST: lastp   xzr, p15, p15.d
+// CHECK-ENCODING: [0xff,0xbd,0xe2,0x25]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 25e2bdff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s
new file mode 100644
index 000000000000000..9dd089271462596
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/scvtf_z-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+scvtf    z0.s, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: scvtf    z0.s, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+scvtf    z0.d, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: scvtf    z0.d, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+scvtf    z0.h, p8/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: scvtf    z0.h, p8/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+scvtf z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: scvtf z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+scvtf z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: scvtf z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/scvtf_z.s b/llvm/test/MC/AArch64/SVE2p2/scvtf_z.s
new file mode 100644
index 000000000000000..b8898c6485f6199
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/scvtf_z.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// convert to half
+
+scvtf   z0.h, p0/z, z0.h  // 01100100-01011100-11000000-00000000
+// CHECK-INST: scvtf   z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xc0,0x5c,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645cc000 <unknown>
+
+scvtf   z21.h, p5/z, z10.s  // 01100100-01011101-10010101-01010101
+// CHECK-INST: scvtf   z21.h, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0x95,0x5d,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645d9555 <unknown>
+
+scvtf   z31.h, p7/z, z31.d  // 01100100-01011101-11011111-11111111
+// CHECK-INST: scvtf   z31.h, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xdf,0x5d,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645ddfff <unknown>
+
+// convert to single
+
+scvtf   z0.s, p0/z, z0.s  // 01100100-10011101-10000000-00000000
+// CHECK-INST: scvtf   z0.s, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0x80,0x9d,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649d8000 <unknown>
+
+scvtf   z23.s, p3/z, z13.d  // 01100100-11011101-10001101-10110111
+// CHECK-INST: scvtf   z23.s, p3/z, z13.d
+// CHECK-ENCODING: [0xb7,0x8d,0xdd,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dd8db7 <unknown>
+
+// convert to double
+
+scvtf   z21.d, p5/z, z10.s  // 01100100-11011100-10010101-01010101
+// CHECK-INST: scvtf   z21.d, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0x95,0xdc,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dc9555 <unknown>
+
+scvtf   z31.d, p7/z, z31.d  // 01100100-11011101-11011111-11111111
+// CHECK-INST: scvtf   z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xdf,0xdd,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dddfff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s
new file mode 100644
index 000000000000000..8c00a74c69c8c63
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/sqabs_z-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sqabs     z31.b, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqabs     z31.b, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqabs     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqabs     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+sqabs     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: sqabs     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+sqabs z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqabs z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sqabs z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqabs z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/sqabs_z.s b/llvm/test/MC/AArch64/SVE2p2/sqabs_z.s
new file mode 100644
index 000000000000000..c583423fac6eba9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/sqabs_z.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+sqabs   z0.b, p0/z, z0.b  // 01000100-00001010-10100000-00000000
+// CHECK-INST: sqabs   z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x0a,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 440aa000 <unknown>
+
+sqabs   z21.h, p5/z, z10.h  // 01000100-01001010-10110101-01010101
+// CHECK-INST: sqabs   z21.h, p5/z, z10.h
+// CHECK-ENCODING: [0x55,0xb5,0x4a,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 444ab555 <unknown>
+
+sqabs   z23.s, p3/z, z13.s  // 01000100-10001010-10101101-10110111
+// CHECK-INST: sqabs   z23.s, p3/z, z13.s
+// CHECK-ENCODING: [0xb7,0xad,0x8a,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 448aadb7 <unknown>
+
+sqabs   z31.d, p7/z, z31.d  // 01000100-11001010-10111111-11111111
+// CHECK-INST: sqabs   z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xca,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 44cabfff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s
new file mode 100644
index 000000000000000..576633c0ab1b64a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/sqneg_z-diagnostics.s
@@ -0,0 +1,37 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// ------------------------------------------------------------------------- //
+// Invalid element width
+
+sqneg     z31.b, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqneg     z31.b, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sqneg     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sqneg     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+sqneg     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: sqneg     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.h, p0/z, z7.h
+sqneg z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqneg z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sqneg z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sqneg z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/sqneg_z.s b/llvm/test/MC/AArch64/SVE2p2/sqneg_z.s
new file mode 100644
index 000000000000000..287211fd3ff5dd9
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/sqneg_z.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+sqneg   z0.b, p0/z, z0.b  // 01000100-00001011-10100000-00000000
+// CHECK-INST: sqneg   z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x0b,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 440ba000 <unknown>
+
+sqneg   z23.h, p3/z, z13.h  // 01000100-01001011-10101101-10110111
+// CHECK-INST: sqneg   z23.h, p3/z, z13.h
+// CHECK-ENCODING: [0xb7,0xad,0x4b,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 444badb7 <unknown>
+
+sqneg   z21.s, p5/z, z10.s  // 01000100-10001011-10110101-01010101
+// CHECK-INST: sqneg   z21.s, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0xb5,0x8b,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 448bb555 <unknown>
+
+sqneg   z31.d, p7/z, z31.d  // 01000100-11001011-10111111-11111111
+// CHECK-INST: sqneg   z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xcb,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 44cbbfff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s
new file mode 100644
index 000000000000000..1317428dafde03b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z-diagnostics.s
@@ -0,0 +1,34 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+ucvtf    z0.s, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ucvtf    z0.s, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ucvtf    z0.d, p0/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ucvtf    z0.d, p0/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+ucvtf    z0.h, p8/z, z0.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: ucvtf    z0.h, p8/z, z0.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+ucvtf z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ucvtf z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ucvtf z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ucvtf z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s
new file mode 100644
index 000000000000000..9e87afd60b4055b
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/ucvtf_z.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// convert to half
+
+ucvtf   z0.h, p0/z, z0.h  // 01100100-01011100-11100000-00000000
+// CHECK-INST: ucvtf   z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xe0,0x5c,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645ce000 <unknown>
+
+ucvtf   z21.h, p5/z, z10.s  // 01100100-01011101-10110101-01010101
+// CHECK-INST: ucvtf   z21.h, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0xb5,0x5d,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645db555 <unknown>
+
+ucvtf   z31.h, p7/z, z31.d  // 01100100-01011101-11111111-11111111
+// CHECK-INST: ucvtf   z31.h, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xff,0x5d,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 645dffff <unknown>
+
+// convert to single
+
+ucvtf   z23.s, p3/z, z13.s  // 01100100-10011101-10101101-10110111
+// CHECK-INST: ucvtf   z23.s, p3/z, z13.s
+// CHECK-ENCODING: [0xb7,0xad,0x9d,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 649dadb7 <unknown>
+
+ucvtf   z21.s, p5/z, z10.d  // 01100100-11011101-10110101-01010101
+// CHECK-INST: ucvtf   z21.s, p5/z, z10.d
+// CHECK-ENCODING: [0x55,0xb5,0xdd,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64ddb555 <unknown>
+
+// convert to double
+
+ucvtf   z0.d, p0/z, z0.s  // 01100100-11011100-10100000-00000000
+// CHECK-INST: ucvtf   z0.d, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0xa0,0xdc,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64dca000 <unknown>
+
+ucvtf   z31.d, p7/z, z31.d  // 01100100-11011101-11111111-11111111
+// CHECK-INST: ucvtf   z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xff,0xdd,0x64]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 64ddffff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s
new file mode 100644
index 000000000000000..b44039fae464a97
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z-diagnotics.s
@@ -0,0 +1,249 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+abs     z31.b, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: abs     z31.b, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cls     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: cls     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+clz     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: clz     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cnot     z31.b, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: cnot     z31.b, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cnt     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: cnt     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fabs     z31.h, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fabs     z31.h, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fneg     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: fneg     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+neg     z31.s, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: neg     z31.s, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+not     z31.b, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: not     z31.b, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sxtb     z31.h, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sxtb     z31.h, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sxth     z31.s, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sxth     z31.s, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sxtw     z31.d, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: sxtw     z31.d, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uxtb     z31.s, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uxtb     z31.s, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uxth     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uxth     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uxtw     z31.d, p7/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: uxtw     z31.d, p7/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+abs     z31.s, p8/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: abs     z31.s, p8/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cls     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: cls     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+clz     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: clz     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cnot     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: cnot     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+cnt     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: cnt     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fabs    z31.h, p8/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fabs    z31.h, p8/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+fneg    z31.h, p8/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: fneg    z31.h, p8/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+neg     z31.s, p8/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: neg     z31.s, p8/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+not     z31.b, p8/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: not     z31.b, p8/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sxtb     z31.s, p8/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: sxtb     z31.s, p8/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sxth z0.s, p8/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: sxth z0.s, p8/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+sxtw z0.d, p8/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: sxtw z0.d, p8/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uxtb     z31.s, p8/z, z31.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: uxtb     z31.s, p8/z, z31.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uxth z0.s, p8/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: uxth z0.s, p8/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+uxtw z0.d, p8/z, z0.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: uxtw z0.d, p8/z, z0.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.d, p0/z, z7.d
+abs z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: abs z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cls z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cls z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+clz z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: clz z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+cnot z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cnot z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+cnt z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: cnt z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+fabs z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fabs z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+fneg z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: fneg z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+neg z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: neg z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.h, p0/z, z7.h
+not z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: not z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sxtb z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sxtb z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+sxth z0.s, p0/z, z3.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sxth z0.s, p0/z, z3.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+sxtw z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: sxtw z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+uxtb z0.h, p0/z, z3.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uxtb z0.h, p0/z, z3.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0.s, p0/z, z7.s
+uxth z0.s, p0/z, z3.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uxth z0.s, p0/z, z3.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+uxtw z0.d, p0/z, z3.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: uxtw z0.d, p0/z, z3.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
diff --git a/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s
new file mode 100644
index 000000000000000..c460602f31ae644
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/unary_arithmetic_predicated_z.s
@@ -0,0 +1,225 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// ABS
+
+abs     z0.b, p0/z, z0.b  // 00000100-00000110-10100000-00000000
+// CHECK-INST: abs     z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x06,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0406a000 <unknown>
+
+abs     z31.d, p7/z, z31.d  // 00000100-11000110-10111111-11111111
+// CHECK-INST: abs     z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc6,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c6bfff <unknown>
+
+
+// CLS
+
+cls     z0.b, p0/z, z0.b  // 00000100-00001000-10100000-00000000
+// CHECK-INST: cls     z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x08,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0408a000 <unknown>
+
+clz     z31.d, p7/z, z31.d  // 00000100-11001001-10111111-11111111
+// CHECK-INST: clz     z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc9,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c9bfff <unknown>
+
+// CLZ
+
+clz     z0.b, p0/z, z0.b  // 00000100-00001001-10100000-00000000
+// CHECK-INST: clz     z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x09,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0409a000 <unknown>
+
+clz     z31.d, p7/z, z31.d  // 00000100-11001001-10111111-11111111
+// CHECK-INST: clz     z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc9,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c9bfff <unknown>
+
+// CNOT
+
+cnot    z0.b, p0/z, z0.b  // 00000100-00001011-10100000-00000000
+// CHECK-INST: cnot    z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x0b,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 040ba000 <unknown>
+
+cnot    z31.d, p7/z, z31.d  // 00000100-11001011-10111111-11111111
+// CHECK-INST: cnot    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xcb,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04cbbfff <unknown>
+
+// CNT
+
+cnt     z0.b, p0/z, z0.b  // 00000100-00001010-10100000-00000000
+// CHECK-INST: cnt     z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x0a,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 040aa000 <unknown>
+
+cnt     z31.d, p7/z, z31.d  // 00000100-11001010-10111111-11111111
+// CHECK-INST: cnt     z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xca,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04cabfff <unknown>
+
+
+// FABS
+
+fabs    z0.h, p0/z, z0.h  // 00000100-01001100-10100000-00000000
+// CHECK-INST: fabs    z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xa0,0x4c,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 044ca000 <unknown>
+
+fabs    z31.d, p7/z, z31.d  // 00000100-11001100-10111111-11111111
+// CHECK-INST: fabs    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xcc,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04ccbfff <unknown>
+
+// FNEG
+
+fneg    z0.h, p0/z, z0.h  // 00000100-01001101-10100000-00000000
+// CHECK-INST: fneg    z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xa0,0x4d,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 044da000 <unknown>
+
+fneg    z31.d, p7/z, z31.d  // 00000100-11001101-10111111-11111111
+// CHECK-INST: fneg    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xcd,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04cdbfff <unknown>
+
+// NEG
+
+neg     z0.b, p0/z, z0.b  // 00000100-00000111-10100000-00000000
+// CHECK-INST: neg     z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x07,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0407a000 <unknown>
+
+neg     z31.d, p7/z, z31.d  // 00000100-11000111-10111111-11111111
+// CHECK-INST: neg     z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc7,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c7bfff <unknown>
+
+//NOT
+
+not     z0.b, p0/z, z0.b  // 00000100-00001110-10100000-00000000
+// CHECK-INST: not     z0.b, p0/z, z0.b
+// CHECK-ENCODING: [0x00,0xa0,0x0e,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 040ea000 <unknown>
+
+not     z31.d, p7/z, z31.d  // 00000100-11001110-10111111-11111111
+// CHECK-INST: not     z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xce,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04cebfff <unknown>
+
+// SXTB
+
+sxtb    z0.h, p0/z, z0.h  // 00000100-01000000-10100000-00000000
+// CHECK-INST: sxtb    z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xa0,0x40,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0440a000 <unknown>
+
+sxtb    z31.d, p7/z, z31.d  // 00000100-11000000-10111111-11111111
+// CHECK-INST: sxtb    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc0,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c0bfff <unknown>
+
+// SXTH
+
+sxth    z0.s, p0/z, z0.s  // 00000100-10000010-10100000-00000000
+// CHECK-INST: sxth    z0.s, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0xa0,0x82,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0482a000 <unknown>
+
+sxth    z31.d, p7/z, z31.d  // 00000100-11000010-10111111-11111111
+// CHECK-INST: sxth    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc2,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c2bfff <unknown>
+
+// SXTW
+
+sxtw    z0.d, p0/z, z0.d  // 00000100-11000100-10100000-00000000
+// CHECK-INST: sxtw    z0.d, p0/z, z0.d
+// CHECK-ENCODING: [0x00,0xa0,0xc4,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c4a000 <unknown>
+
+sxtw    z31.d, p7/z, z31.d  // 00000100-11000100-10111111-11111111
+// CHECK-INST: sxtw    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc4,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c4bfff <unknown>
+
+// UXTB
+
+uxtb    z0.h, p0/z, z0.h  // 00000100-01000001-10100000-00000000
+// CHECK-INST: uxtb    z0.h, p0/z, z0.h
+// CHECK-ENCODING: [0x00,0xa0,0x41,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0441a000 <unknown>
+
+uxtb    z31.d, p7/z, z31.d  // 00000100-11000001-10111111-11111111
+// CHECK-INST: uxtb    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc1,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c1bfff <unknown>
+
+uxth    z0.s, p0/z, z0.s  // 00000100-10000011-10100000-00000000
+// CHECK-INST: uxth    z0.s, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0xa0,0x83,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 0483a000 <unknown>
+
+uxth    z31.d, p7/z, z31.d  // 00000100-11000011-10111111-11111111
+// CHECK-INST: uxth    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc3,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c3bfff <unknown>
+
+// UXTW
+
+uxtw    z0.d, p0/z, z0.d  // 00000100-11000101-10100000-00000000
+// CHECK-INST: uxtw    z0.d, p0/z, z0.d
+// CHECK-ENCODING: [0x00,0xa0,0xc5,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c5a000 <unknown>
+
+uxtw    z31.d, p7/z, z31.d  // 00000100-11000101-10111111-11111111
+// CHECK-INST: uxtw    z31.d, p7/z, z31.d
+// CHECK-ENCODING: [0xff,0xbf,0xc5,0x04]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 04c5bfff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s
new file mode 100644
index 000000000000000..9da2a7e096f3af4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/urecpe_z-diagnostics.s
@@ -0,0 +1,47 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+urecpe     z31.b, p7/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urecpe     z31.b, p7/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urecpe     z31.h, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urecpe     z31.h, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urecpe     z31.s, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urecpe     z31.s, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+urecpe     z31.d, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: urecpe     z31.d, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+urecpe z0.s, p8/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: urecpe z0.s, p8/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+urecpe z0.s, p0/z, z3.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: urecpe z0.s, p0/z, z3.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+urecpe z0.s, p0/z, z3.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: urecpe z0.s, p0/z, z3.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/urecpe_z.s b/llvm/test/MC/AArch64/SVE2p2/urecpe_z.s
new file mode 100644
index 000000000000000..6b3ec79c988690a
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/urecpe_z.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+urecpe  z0.s, p0/z, z0.s  // 01000100-10000010-10100000-00000000
+// CHECK-INST: urecpe  z0.s, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0xa0,0x82,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 4482a000 <unknown>
+
+urecpe  z23.s, p3/z, z13.s  // 01000100-10000010-10101101-10110111
+// CHECK-INST: urecpe  z23.s, p3/z, z13.s
+// CHECK-ENCODING: [0xb7,0xad,0x82,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 4482adb7 <unknown>
+
+urecpe  z31.s, p7/z, z31.s  // 01000100-10000010-10111111-11111111
+// CHECK-INST: urecpe  z31.s, p7/z, z31.s
+// CHECK-ENCODING: [0xff,0xbf,0x82,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 4482bfff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s
new file mode 100644
index 000000000000000..1b6cf8d78e655ef
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z-diagnostics.s
@@ -0,0 +1,47 @@
+// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s
+
+// --------------------------------------------------------------------------//
+// Invalid element width
+
+ursqrte     z31.b, p7/z, z31.b
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ursqrte     z31.b, p7/z, z31.b
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursqrte     z31.h, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ursqrte     z31.h, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursqrte     z31.s, p7/z, z31.h
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ursqrte     z31.s, p7/z, z31.h
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+ursqrte     z31.d, p7/z, z31.d
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width
+// CHECK-NEXT: ursqrte     z31.d, p7/z, z31.d
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// ------------------------------------------------------------------------- //
+// Invalid predicate
+
+ursqrte z0.s, p8/z, z0.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix)
+// CHECK-NEXT: ursqrte z0.s, p8/z, z0.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
+movprfx z0.s, p0/z, z7.s
+ursqrte z0.s, p0/z, z3.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ursqrte z0.s, p0/z, z3.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+ursqrte z0.s, p0/z, z3.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: ursqrte z0.s, p0/z, z3.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s
new file mode 100644
index 000000000000000..097d82bebd430b4
--- /dev/null
+++ b/llvm/test/MC/AArch64/SVE2p2/ursqrte_z.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \
+// RUN:        | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+ursqrte z0.s, p0/z, z0.s  // 01000100-10000011-10100000-00000000
+// CHECK-INST: ursqrte z0.s, p0/z, z0.s
+// CHECK-ENCODING: [0x00,0xa0,0x83,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 4483a000 <unknown>
+
+ursqrte z21.s, p5/z, z10.s  // 01000100-10000011-10110101-01010101
+// CHECK-INST: ursqrte z21.s, p5/z, z10.s
+// CHECK-ENCODING: [0x55,0xb5,0x83,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 4483b555 <unknown>
+
+ursqrte z31.s, p7/z, z31.s  // 01000100-10000011-10111111-11111111
+// CHECK-INST: ursqrte z31.s, p7/z, z31.s
+// CHECK-ENCODING: [0xff,0xbf,0x83,0x44]
+// CHECK-ERROR: instruction requires: sme2p2 or sve2p2
+// CHECK-UNKNOWN: 4483bfff <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/armv9.6a-cvtf.s b/llvm/test/MC/AArch64/armv9.6a-cvtf.s
new file mode 100644
index 000000000000000..6858d3896af5aba
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-cvtf.s
@@ -0,0 +1,61 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \
+// RUN:        | llvm-objdump -d --mattr=+fprcvt - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \
+// RUN:        | llvm-objdump -d  --no-print-imm-hex --mattr=-fprcvt - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+fprcvt -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+scvtf d1, s2
+// CHECK-INST: scvtf d1, s2
+// CHECK-ENCODING: [0x41,0x00,0x7c,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e7c0041 <unknown>
+
+scvtf h1, s2
+// CHECK-INST: scvtf h1, s2
+// CHECK-ENCODING: [0x41,0x00,0xfc,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1efc0041 <unknown>
+
+scvtf h2, d0
+// CHECK-INST: scvtf h2, d0
+// CHECK-ENCODING: [0x02,0x00,0xfc,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9efc0002 <unknown>
+
+scvtf s3, d4
+// CHECK-INST: scvtf s3, d4
+// CHECK-ENCODING: [0x83,0x00,0x3c,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3c0083 <unknown>
+
+ucvtf d1, s2
+// CHECK-INST: ucvtf d1, s2
+// CHECK-ENCODING: [0x41,0x00,0x7d,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e7d0041 <unknown>
+
+ucvtf h1, s2
+// CHECK-INST: ucvtf h1, s2
+// CHECK-ENCODING: [0x41,0x00,0xfd,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1efd0041 <unknown>
+
+ucvtf h2, d0
+// CHECK-INST: ucvtf h2, d0
+// CHECK-ENCODING: [0x02,0x00,0xfd,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9efd0002 <unknown>
+
+ucvtf s3, d4
+// CHECK-INST: ucvtf s3, d4
+// CHECK-ENCODING: [0x83,0x00,0x3d,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3d0083 <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/armv9.6a-fcvt.s b/llvm/test/MC/AArch64/armv9.6a-fcvt.s
new file mode 100644
index 000000000000000..b14ec93563f5c4d
--- /dev/null
+++ b/llvm/test/MC/AArch64/armv9.6a-fcvt.s
@@ -0,0 +1,253 @@
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
+// RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \
+// RUN:        | llvm-objdump -d --mattr=+fprcvt - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+fprcvt < %s \
+// RUN:        | llvm-objdump -d  --no-print-imm-hex --mattr=-fprcvt - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// Disassemble encoding and check the re-encoding (-show-encoding) matches.
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+fprcvt < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+fprcvt -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+fcvtas s0, d1
+// CHECK-INST: fcvtas s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x7a,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e7a0020 <unknown>
+
+fcvtas s1, h2
+// CHECK-INST: fcvtas s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xfa,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1efa0041 <unknown>
+
+fcvtas d3, h4
+// CHECK-INST: fcvtas d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xfa,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9efa0083 <unknown>
+
+fcvtas d0, s5
+// CHECK-INST: fcvtas d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x3a,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3a00a0 <unknown>
+
+fcvtau s0, d1
+// CHECK-INST: fcvtau s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x7b,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e7b0020 <unknown>
+
+fcvtau s1, h2
+// CHECK-INST: fcvtau s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xfb,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1efb0041 <unknown>
+
+fcvtau d3, h4
+// CHECK-INST: fcvtau d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xfb,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9efb0083 <unknown>
+
+fcvtau d0, s5
+// CHECK-INST: fcvtau d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x3b,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3b00a0 <unknown>
+
+fcvtms s0, d1
+// CHECK-INST: fcvtms s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x74,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e740020 <unknown>
+
+fcvtms s1, h2
+// CHECK-INST: fcvtms s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xf4,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1ef40041 <unknown>
+
+fcvtms d3, h4
+// CHECK-INST: fcvtms d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xf4,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9ef40083 <unknown>
+
+fcvtms d0, s5
+// CHECK-INST: fcvtms d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x34,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3400a0 <unknown>
+
+fcvtmu s0, d1
+// CHECK-INST: fcvtmu s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x75,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e750020 <unknown>
+
+fcvtmu s1, h2
+// CHECK-INST: fcvtmu s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xf5,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1ef50041 <unknown>
+
+fcvtmu d3, h4
+// CHECK-INST: fcvtmu d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xf5,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9ef50083 <unknown>
+
+fcvtmu d0, s5
+// CHECK-INST: fcvtmu d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x35,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3500a0 <unknown>
+
+fcvtns s0, d1
+// CHECK-INST: fcvtns s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x6a,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e6a0020 <unknown>
+
+fcvtns s1, h2
+// CHECK-INST: fcvtns s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xea,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1eea0041 <unknown>
+
+fcvtns d3, h4
+// CHECK-INST: fcvtns d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xea,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9eea0083 <unknown>
+
+fcvtns d0, s5
+// CHECK-INST: fcvtns d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x2a,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e2a00a0 <unknown>
+
+fcvtnu s0, d1
+// CHECK-INST: fcvtnu s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x6b,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e6b0020 <unknown>
+
+fcvtnu s1, h2
+// CHECK-INST: fcvtnu s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xeb,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1eeb0041 <unknown>
+
+fcvtnu d3, h4
+// CHECK-INST: fcvtnu d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xeb,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9eeb0083 <unknown>
+
+fcvtnu d0, s5
+// CHECK-INST: fcvtnu d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x2b,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e2b00a0 <unknown>
+
+fcvtps s0, d1
+// CHECK-INST: fcvtps s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x72,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e720020 <unknown>
+
+fcvtps s1, h2
+// CHECK-INST: fcvtps s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xf2,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1ef20041 <unknown>
+
+fcvtps d3, h4
+// CHECK-INST: fcvtps d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xf2,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9ef20083 <unknown>
+
+fcvtps d0, s5
+// CHECK-INST: fcvtps d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x32,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3200a0 <unknown>
+
+fcvtpu s0, d1
+// CHECK-INST: fcvtpu s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x73,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e730020 <unknown>
+
+fcvtpu s1, h2
+// CHECK-INST: fcvtpu s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xf3,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1ef30041 <unknown>
+
+fcvtpu d3, h4
+// CHECK-INST: fcvtpu d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xf3,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9ef30083 <unknown>
+
+fcvtpu d0, s5
+// CHECK-INST: fcvtpu d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x33,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3300a0 <unknown>
+
+fcvtzs s0, d1
+// CHECK-INST: fcvtzs s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x76,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e760020 <unknown>
+
+fcvtzs s1, h2
+// CHECK-INST: fcvtzs s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xf6,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1ef60041 <unknown>
+
+fcvtzs d3, h4
+// CHECK-INST: fcvtzs d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xf6,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9ef60083 <unknown>
+
+fcvtzs d0, s5
+// CHECK-INST: fcvtzs d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x36,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3600a0 <unknown>
+
+fcvtzu s0, d1
+// CHECK-INST: fcvtzu s0, d1
+// CHECK-ENCODING: [0x20,0x00,0x77,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1e770020 <unknown>
+
+fcvtzu s1, h2
+// CHECK-INST: fcvtzu s1, h2
+// CHECK-ENCODING: [0x41,0x00,0xf7,0x1e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 1ef70041 <unknown>
+
+fcvtzu d3, h4
+// CHECK-INST: fcvtzu d3, h4
+// CHECK-ENCODING: [0x83,0x00,0xf7,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9ef70083 <unknown>
+
+fcvtzu d0, s5
+// CHECK-INST: fcvtzu d0, s5
+// CHECK-ENCODING: [0xa0,0x00,0x37,0x9e]
+// CHECK-ERROR: instruction requires: fprcvt
+// CHECK-UNKNOWN: 9e3700a0 <unknown>
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/directive-arch-negative.s b/llvm/test/MC/AArch64/directive-arch-negative.s
index 19b48ea66bfe6ab..4c17c5609712036 100644
--- a/llvm/test/MC/AArch64/directive-arch-negative.s
+++ b/llvm/test/MC/AArch64/directive-arch-negative.s
@@ -61,3 +61,21 @@
         cbhi x5, x5, #1020
 # CHECK: error: instruction requires: cmpbr
 # CHECK-NEXT:   cbhi x5, x5, #1020
+
+	.arch armv9.6.-a+nofprcvt
+        scvtf d1, s2
+
+# CHECK: error: instruction requires: fprcvt
+# CHECK-NEXT:   scvtf d1, s2
+
+	.arch armv9.6.-a+nof8f16mm
+        fmmla v0.8h, v1.16b, v2.16b
+
+# CHECK: error: instruction requires: f8f16mm
+# CHECK-NEXT:   fmmla v0.8h, v1.16b, v2.16b
+
+	.arch armv9.6.-a+nof8f32mm
+        fmmla v0.4s, v1.16b, v2.16b
+
+# CHECK: error: instruction requires: f8f32mm
+# CHECK-NEXT:   fmmla v0.4s, v1.16b, v2.16b
diff --git a/llvm/test/MC/AArch64/directive-arch.s b/llvm/test/MC/AArch64/directive-arch.s
index 8d9c0cef7536d36..ba605cc5d1a6984 100644
--- a/llvm/test/MC/AArch64/directive-arch.s
+++ b/llvm/test/MC/AArch64/directive-arch.s
@@ -26,3 +26,15 @@
 	cbne x5, #31, lbl
 # CHECK:        cbne x5, #31, lbl
 
+
+	.arch armv9-a+fprcvt
+	scvtf h1, s2
+# CHECK:        scvtf h1, s2
+
+	.arch armv9-a+f8f16mm
+	fmmla v0.8h, v1.16b, v2.16b
+# CHECK:        fmmla v0.8h, v1.16b, v2.16b
+
+	.arch armv9-a+f8f32mm
+	fmmla v0.4s, v1.16b, v2.16b
+# CHECK:        fmmla v0.4s, v1.16b, v2.16b
diff --git a/llvm/test/MC/AArch64/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/directive-arch_extension-negative.s
index 363989d7b9b260e..63da153c1a6ffca 100644
--- a/llvm/test/MC/AArch64/directive-arch_extension-negative.s
+++ b/llvm/test/MC/AArch64/directive-arch_extension-negative.s
@@ -237,3 +237,21 @@ cbhi x5, x5, #1020
 // CHECK: [[@LINE-1]]:1: error: instruction requires: cmpbr
 // CHECK-NEXT: cbhi x5, x5, #1020
 
+
+.arch_extension fprcvt
+.arch_extension nofprcvt
+fcvtmu s0, d1
+// CHECK: [[@LINE-1]]:1: error: instruction requires: fprcvt
+// CHECK-NEXT: fcvtmu s0, d1
+
+.arch_extension f8f16mm
+.arch_extension nof8f16mm
+fmmla v2.8h, v1.16b, v0.16b
+// CHECK: [[@LINE-1]]:1: error: instruction requires: f8f16mm
+// CHECK-NEXT: fmmla v2.8h, v1.16b, v0.16b
+
+.arch_extension f8f32mm
+.arch_extension nof8f32mm
+fmmla v2.4s, v1.16b, v0.16b
+// CHECK: [[@LINE-1]]:1: error: instruction requires: f8f32mm
+// CHECK-NEXT: fmmla v2.4s, v1.16b, v0.16b
diff --git a/llvm/test/MC/AArch64/directive-arch_extension.s b/llvm/test/MC/AArch64/directive-arch_extension.s
index 8a0e1ac471cea78..b8e8696c7abbf10 100644
--- a/llvm/test/MC/AArch64/directive-arch_extension.s
+++ b/llvm/test/MC/AArch64/directive-arch_extension.s
@@ -189,3 +189,15 @@ msr SSBS, #1
 .arch_extension tme
 tstart x0
 // CHECK: tstart x0
+
+.arch_extension fprcvt
+fcvtns s0, d1
+// CHECK: fcvtns s0, d1
+
+.arch_extension f8f16mm
+fmmla v1.8h, v2.16b, v3.16b
+// CHECK: fmmla v1.8h, v2.16b, v3.16b
+
+.arch_extension f8f32mm
+fmmla v1.4s, v2.16b, v3.16b
+// CHECK: fmmla v1.4s, v2.16b, v3.16b
diff --git a/llvm/test/MC/AArch64/directive-cpu.s b/llvm/test/MC/AArch64/directive-cpu.s
index e3d7b1cd75e5501..1a0a0bd0c5132ce 100644
--- a/llvm/test/MC/AArch64/directive-cpu.s
+++ b/llvm/test/MC/AArch64/directive-cpu.s
@@ -39,3 +39,15 @@ sha512h q0, q1, v2.2d
 .cpu generic+sm4
 sm4e v2.4s, v15.4s
 // CHECK: sm4e  v2.4s, v15.4s
+
+.cpu generic+fprcvt
+scvtf d1, s2
+// CHECK: scvtf d1, s2
+
+.cpu generic+f8f16mm
+fmmla v0.8h, v1.16b, v2.16b
+// CHECK: fmmla v0.8h, v1.16b, v2.16b
+
+.cpu generic+f8f32mm
+fmmla v0.4s, v1.16b, v2.16b
+// CHECK: fmmla v0.4s, v1.16b, v2.16b
diff --git a/llvm/test/MC/AArch64/directives-case_insensitive.s b/llvm/test/MC/AArch64/directives-case_insensitive.s
index be92e00cfad11a2..35a90a1bffea8d0 100644
--- a/llvm/test/MC/AArch64/directives-case_insensitive.s
+++ b/llvm/test/MC/AArch64/directives-case_insensitive.s
@@ -32,10 +32,12 @@ fred .REQ x5
 
 .CFI_STARTPROC
 .CFI_NEGATE_RA_STATE
+.CFI_NEGATE_RA_STATE_WITH_PC
 .CFI_B_KEY_FRAME
 .CFI_ENDPROC
 // CHECK: .cfi_startproc
 // CHECK: .cfi_negate_ra_state
+// CHECK: .cfi_negate_ra_state_with_pc
 // CHECK: .cfi_b_key_frame
 // CHECK: .cfi_endproc
 
diff --git a/llvm/test/MC/AArch64/negate_ra_state_with_pc.s b/llvm/test/MC/AArch64/negate_ra_state_with_pc.s
new file mode 100644
index 000000000000000..44b8ab2df9a908a
--- /dev/null
+++ b/llvm/test/MC/AArch64/negate_ra_state_with_pc.s
@@ -0,0 +1,7 @@
+//RUN: llvm-mc  -triple=aarch64-arm-none-eabi -o - %s | FileCheck %s
+
+// CHECK: .cfi_negate_ra_state_with_pc
+foo:
+  .cfi_startproc
+  .cfi_negate_ra_state_with_pc
+  .cfi_endproc
diff --git a/llvm/test/MC/AArch64/neon-diagnostics.s b/llvm/test/MC/AArch64/neon-diagnostics.s
index 9a0445131ddf7c6..6863a89bbe189ed 100644
--- a/llvm/test/MC/AArch64/neon-diagnostics.s
+++ b/llvm/test/MC/AArch64/neon-diagnostics.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fprcvt < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
 
 //------------------------------------------------------------------------------
@@ -5176,6 +5176,32 @@
 // CHECK-ERROR:        ucvtf d21, s14, #64
 // CHECK-ERROR:                   ^
 
+//----------------------------------------------------------------------
+// Scalar Signed Integer Convert To Floating-Point
+//---------------------------------------------------------------------
+
+    scvtf d0, h0
+    scvtf s0, h0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf d0, h0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         scvtf s0, h0
+// CHECK-ERROR:                   ^
+
+//----------------------------------------------------------------------
+// Scalar Unsigned Integer Convert To Floating-Point
+//---------------------------------------------------------------------
+
+    ucvtf d0, h0
+    ucvtf s0, h0
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf d0, h0
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         ucvtf s0, h0
+// CHECK-ERROR:                   ^
+
 //------------------------------------------------------------------------------
 // Element reverse
 //------------------------------------------------------------------------------
@@ -6943,14 +6969,14 @@
 // With Ties To Away
 //----------------------------------------------------------------------
 
-    fcvtas s0, d0
-    fcvtas d0, s0
+    fcvtas h0, d0
+    fcvtas h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtas s0, d0
+// CHECK-ERROR:        fcvtas h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtas d0, s0
+// CHECK-ERROR:        fcvtas h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -6958,14 +6984,14 @@
 // Nearest With Ties To Away
 //----------------------------------------------------------------------
 
-    fcvtau s0, d0
-    fcvtau d0, s0
+    fcvtau h0, d0
+    fcvtau h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtau s0, d0
+// CHECK-ERROR:        fcvtau h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtau d0, s0
+// CHECK-ERROR:        fcvtau h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -6973,14 +6999,14 @@
 // Minus Infinity
 //----------------------------------------------------------------------
 
-    fcvtms s0, d0
-    fcvtms d0, s0
+    fcvtms h0, d0
+    fcvtms h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtms s0, d0
+// CHECK-ERROR:        fcvtms h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtms d0, s0
+// CHECK-ERROR:        fcvtms h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -6988,14 +7014,14 @@
 // Minus Infinity
 //----------------------------------------------------------------------
 
-    fcvtmu s0, d0
-    fcvtmu d0, s0
+    fcvtmu h0, d0
+    fcvtmu h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtmu s0, d0
+// CHECK-ERROR:        fcvtmu h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtmu d0, s0
+// CHECK-ERROR:        fcvtmu h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -7003,14 +7029,14 @@
 // With Ties To Even
 //----------------------------------------------------------------------
 
-    fcvtns s0, d0
-    fcvtns d0, s0
+    fcvtns h0, d0
+    fcvtns h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtns s0, d0
+// CHECK-ERROR:        fcvtns h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtns d0, s0
+// CHECK-ERROR:        fcvtns h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -7018,14 +7044,14 @@
 // Nearest With Ties To Even
 //----------------------------------------------------------------------
 
-    fcvtnu s0, d0
-    fcvtnu d0, s0
+    fcvtnu h0, d0
+    fcvtnu h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtnu s0, d0
+// CHECK-ERROR:        fcvtnu h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtnu d0, s0
+// CHECK-ERROR:        fcvtnu h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -7033,14 +7059,14 @@
 // Positive Infinity
 //----------------------------------------------------------------------
 
-    fcvtps s0, d0
-    fcvtps d0, s0
+    fcvtps h0, d0
+    fcvtps h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtps s0, d0
+// CHECK-ERROR:        fcvtps h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtps d0, s0
+// CHECK-ERROR:        fcvtps h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -7048,28 +7074,28 @@
 // Positive Infinity
 //----------------------------------------------------------------------
 
-    fcvtpu s0, d0
-    fcvtpu d0, s0
+    fcvtpu h0, d0
+    fcvtpu h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtpu s0, d0
+// CHECK-ERROR:        fcvtpu h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtpu d0, s0
+// CHECK-ERROR:        fcvtpu h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
 // Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
 //----------------------------------------------------------------------
 
-    fcvtzs s0, d0
-    fcvtzs d0, s0
+    fcvtzs h0, d0
+    fcvtzs h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtzs s0, d0
+// CHECK-ERROR:        fcvtzs h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtzs d0, s0
+// CHECK-ERROR:        fcvtzs h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
@@ -7077,14 +7103,14 @@
 // Zero
 //----------------------------------------------------------------------
 
-    fcvtzu s0, d0
-    fcvtzu d0, s0
+    fcvtzu h0, d0
+    fcvtzu h0, s0
 
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtzu s0, d0
+// CHECK-ERROR:        fcvtzu h0, d0
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
-// CHECK-ERROR:        fcvtzu d0, s0
+// CHECK-ERROR:        fcvtzu h0, s0
 // CHECK-ERROR:                   ^
 
 //----------------------------------------------------------------------
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s
new file mode 100644
index 000000000000000..faa2b1f97699971
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc-fake16.s
@@ -0,0 +1,10486 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, v255, v2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, s1, v2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, s105, v255
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, vcc_lo, s2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, vcc_hi, s105
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, ttmp15, ttmp15
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, m0, src_scc
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v1, 0.5
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0xe1,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v255, v2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], s1, v2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], s105, v255
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], vcc_lo, s2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], vcc_hi, s105
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], ttmp15, ttmp15
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], m0, src_scc
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi
+// GFX11: encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
+
+v_cmp_class_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 null, -|0xaf123456|, vcc_hi
+// GFX11: encoding: [0x7c,0x01,0x7e,0xd4,0xff,0xd6,0x00,0x20,0x56,0x34,0x12,0xaf]
+
+v_cmp_class_f64_e64 s5, v[1:2], v2
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[1:2], v255
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[1:2], s2
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[1:2], s105
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[254:255], ttmp15
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, s[2:3], vcc_hi
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, s[104:105], vcc_lo
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, vcc, m0
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, ttmp[14:15], exec_hi
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, exec, exec_lo
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s105, null, null
+// W32: encoding: [0x69,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 vcc_hi, 0.5, 0.5
+// W32: encoding: [0x6b,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 ttmp15, -|src_scc|, src_scc
+// W32: encoding: [0x7b,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], v2
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], v255
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], s2
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], s105
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[254:255], ttmp15
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], s[2:3], vcc_hi
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], s[104:105], vcc_lo
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], vcc, m0
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], ttmp[14:15], exec_hi
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], exec, exec_lo
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], null, null
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 vcc, 0.5, 0.5
+// W64: encoding: [0x6a,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 ttmp[14:15], -|src_scc|, src_scc
+// W64: encoding: [0x7a,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456
+// GFX11: encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_eq_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x12,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x12,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x22,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x32,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x32,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_eq_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x42,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x42,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x52,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x3a,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_eq_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x4a,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_f_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x00,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x00,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x00,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x00,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x00,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x00,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x00,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x00,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x00,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x00,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x00,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x00,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_f_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x10,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x10,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x10,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x10,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x10,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x10,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x10,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x10,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x10,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x10,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x10,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x10,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x10,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_f_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x20,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x20,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x20,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x20,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x20,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x20,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x20,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x20,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x20,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x20,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x20,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x20,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x20,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x20,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x20,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x20,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x20,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_f_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x40,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x40,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x40,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x40,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x40,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x40,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x40,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x40,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x40,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x40,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x40,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_f_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x50,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x50,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x50,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x50,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x50,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x50,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x50,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x50,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x50,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x50,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x50,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x50,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x50,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x50,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x50,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x50,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_f_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x48,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x48,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x48,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x48,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x48,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x48,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x48,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x48,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x48,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x48,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x48,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_f_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x58,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x58,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x58,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x58,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x58,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x58,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x58,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x58,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x58,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x58,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x58,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x58,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x58,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x58,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x58,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_f_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x58,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ge_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x16,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x16,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x26,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x36,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x36,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ge_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x46,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x46,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x56,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x3e,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ge_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x4e,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_gt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x14,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x14,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x24,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x34,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x34,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_gt_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x44,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x44,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x54,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x3c,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_gt_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x4c,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_le_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x13,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x13,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x23,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x33,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x33,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_le_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x43,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x43,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x53,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x3b,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_le_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x4b,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lg_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lg_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x15,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x15,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_lg_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x25,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x01,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x01,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x11,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x11,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x21,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x31,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x31,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lt_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x41,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x41,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x51,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x39,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x39,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lt_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x49,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x49,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x59,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x35,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x35,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ne_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x45,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x45,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x55,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 null, 0xfe0b, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x3d,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ne_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x4d,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_neq_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_neq_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1d,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_neq_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nge_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nge_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x19,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x19,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nge_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_ngt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ngt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1b,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_ngt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nle_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nle_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1c,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nle_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlg_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nlg_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1a,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlg_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nlt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1e,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_o_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_o_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x17,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x17,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_o_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_t_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_t_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1f,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_t_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2f,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_t_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x47,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x47,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x47,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x47,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x47,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x47,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x47,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x47,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x47,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x47,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x47,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_t_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x57,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x57,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x57,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x57,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x57,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x57,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x57,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x57,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x57,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x57,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x57,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x57,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x57,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x57,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x57,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_i64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x57,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_t_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4f,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4f,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4f,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4f,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4f,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4f,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4f,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4f,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4f,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u32_e64 null, 0xaf123456, vcc_hi
+// GFX11: encoding: [0x7c,0x00,0x4f,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_t_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5f,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5f,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5f,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5f,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5f,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5f,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5f,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5f,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5f,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_t_u64_e64 null, 0xaf123456, vcc
+// GFX11: encoding: [0x7c,0x00,0x5f,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_tru_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0f,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0f,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0f,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0f,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0f,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x0f,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_tru_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1f,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1f,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1f,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1f,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1f,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x1f,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_tru_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2f,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2f,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2f,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2f,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2f,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_tru_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x2f,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_u_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_u_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x18,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX11: encoding: [0x7c,0x83,0x18,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_u_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX11: encoding: [0x7c,0x82,0x28,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
index d0e79c0aa3444bb..faa2b1f97699971 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_from_vopc.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_e64 s5, v1, v2
 // W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s
new file mode 100644
index 000000000000000..e896511e6e5c654
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc-fake16.s
@@ -0,0 +1,10948 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_e32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, v[1:2], v2
+// W32: encoding: [0x01,0x05,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, v[254:255], v2
+// W32: encoding: [0xfe,0x05,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, s[2:3], v2
+// W32: encoding: [0x02,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, s[104:105], v2
+// W32: encoding: [0x68,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, vcc, v2
+// W32: encoding: [0x6a,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, ttmp[14:15], v2
+// W32: encoding: [0x7a,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, exec, v2
+// W32: encoding: [0x7e,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, v[1:2], v2
+// W64: encoding: [0x01,0x05,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, v[254:255], v2
+// W64: encoding: [0xfe,0x05,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, s[2:3], v2
+// W64: encoding: [0x02,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, s[104:105], v2
+// W64: encoding: [0x68,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, vcc, v2
+// W64: encoding: [0x6a,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, ttmp[14:15], v2
+// W64: encoding: [0x7a,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, exec, v2
+// W64: encoding: [0x7e,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x00,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x00,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x00,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x20,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x21,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x20,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x21,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x40,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x41,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x40,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x41,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x80,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x81,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x80,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x81,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa1,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa1,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x90,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x91,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x90,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x91,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb0,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb1,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb0,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb1,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x8e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x8f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x8e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x8f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xae,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xaf,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xae,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xaf,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x9e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x9f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x9e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x9f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xbe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xbf,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xbe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xbf,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x1e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x1e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x1e,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x3e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x3e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x3f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x5e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x5e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x5f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s
index 5349362b8fbaf42..05ed37c612ba386 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_e32 vcc_lo, v1, v2
 // W32: encoding: [0x01,0x05,0xfa,0x7c]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s
new file mode 100644
index 000000000000000..aa315a16b9838fb
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16-fake16.s
@@ -0,0 +1,7172 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x00,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x00,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x21,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x20,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x21,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x81,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x80,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x81,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x91,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x90,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x91,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x8f,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x8e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x8f,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x9f,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x9e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x9f,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x1e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x1e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x3e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x3f,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s
index 1299d02c3c0a535..bc77f0c1967d00c 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp16.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0]
 // W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s
new file mode 100644
index 000000000000000..6f2f9e6704deeab
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8-fake16.s
@@ -0,0 +1,1540 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x00,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x00,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x20,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x20,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x21,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x20,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x20,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x21,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x80,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x80,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x81,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x80,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x80,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x81,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x90,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x90,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x91,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x90,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x90,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_f_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x91,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x8f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x8e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x8f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x9f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x9e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_t_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x9f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x1e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x1e,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x3e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_tru_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x3f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s
index 9f10a29791ad1cb..1c333a0c909170b 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_dpp8.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s
index 889293b1a0f2343..4b97d276d55616d 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_err.s
@@ -1,1973 +1,1973 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
 
 v_cmp_class_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_class_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_class_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
 
 v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_f_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_f_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_f_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_f_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_f_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_i16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_i16_e32 vcc, v127, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_i16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_u16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_u16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_u16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ne_i16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ne_i16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v127, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_i16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ne_u16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ne_u16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_u16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_t_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:26: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc_lo, v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc_lo, v127, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_class_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_class_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_i16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_i16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_u16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_u16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_f_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_f_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_i16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_i16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_u16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_u16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_u16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_u16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_u16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_u16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lg_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lg_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_i16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_i16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_u16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_u16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_i16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_i16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_u16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_u16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nle_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nle_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlg_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlt_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_o_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_o_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_t_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc_lo, v128, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_f_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v127, v255
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
 v_cmp_t_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v1, v255
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmp_f_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v127, v255
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:25: error: invalid operand for instruction
 
-v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_t_f16_e32 vcc_lo, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v1, v255
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v127, v255
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:30: error: invalid operand for instruction
 
-v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v1, v255
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v127, v255
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_tru_f16_e32 vcc_lo, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:35: error: invalid operand for instruction
 
-v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v127, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:33: error: invalid operand for instruction
 
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_t_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
-v_cmp_t_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:36: error: invalid operand for instruction
 
-v_cmp_tru_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_tru_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
+v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index 75f20b0c7f0c4c2..49a3f8ad63e7ec6 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -1,1973 +1,1973 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
 
 v_cmp_class_f16 vcc, v1, v255
-// GFX11: v_cmp_class_f16_e64
+// GFX11: v_cmp_class_f16_e64 vcc, v1, v255       ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v127, v255
-// GFX11: v_cmp_class_f16_e64
+v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_class_f16_e64
+v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_class_f16_e64
+v_cmp_class_f16 vcc, v127, v255
+// GFX11: v_cmp_class_f16_e64 vcc, v127, v255     ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
 v_cmp_class_f16 vcc, v127, v255
-// GFX11: v_cmp_class_f16_e64
+// GFX11: v_cmp_class_f16_e64 vcc, v127, v255     ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_class_f16_e64
+v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_class_f16_e64
+v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v1, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v127, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v128, v2
+// GFX11: v_cmp_class_f16_e64 vcc, v128, v2       ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_eq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v128, v2
+// GFX11: v_cmp_class_f16_e64 vcc, v128, v2       ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_eq_f16 vcc, v1, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v127, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_i16 vcc, v1, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_class_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v127, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_class_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_hi, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_class_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_class_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_class_f16_e64 vcc, vcc_lo, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v1, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_eq_f16 vcc, v1, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v127, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_eq_f16 vcc, v1, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v1, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, v127, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v127, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_eq_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v127, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_eq_u16 vcc, v1, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v127, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v1, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, v128, v2
+// GFX11: v_cmp_eq_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_f_f16 vcc, v127, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, v128, v2
+// GFX11: v_cmp_eq_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_f_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v1, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v127, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_f_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_f_f16_e64
+v_cmp_eq_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v1, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v127, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_eq_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_i16 vcc, v1, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_i16 vcc, v1, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v1, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v127, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, v1, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v127, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ge_i16 vcc, v127, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v127, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ge_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v1, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, v127, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v128, v2
+// GFX11: v_cmp_eq_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_eq_i16 vcc, v128, v2
+// GFX11: v_cmp_eq_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_u16 vcc, v1, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_u16 vcc, v127, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_u16 vcc, v1, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v127, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_eq_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_eq_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v1, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v1, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v127, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v1, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v1, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v127, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v127, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_eq_u16 vcc, v127, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_i16 vcc, v1, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, v127, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v1, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v128, v2
+// GFX11: v_cmp_eq_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_i16 vcc, v127, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v128, v2
+// GFX11: v_cmp_eq_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_u16 vcc, v1, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_u16 vcc, v127, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_eq_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_eq_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v1, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_eq_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v127, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_eq_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_eq_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_f_f16 vcc, v1, v255
+// GFX11: v_cmp_f_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_f_f16 vcc, v1, v255
+// GFX11: v_cmp_f_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x00,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v1, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v127, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v1, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v127, v255
+// GFX11: v_cmp_f_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v127, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v127, v255
+// GFX11: v_cmp_f_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x00,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_f16_e64
+v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_le_i16 vcc, v1, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, v127, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v128, v2
+// GFX11: v_cmp_f_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v128, v2
+// GFX11: v_cmp_f_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x00,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_i16 vcc, v1, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_i16 vcc, v127, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x00,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_i16_e64
+v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_f_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x00,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_u16 vcc, v1, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_f_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v127, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_f_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_f_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_f_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_f_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x00,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v1, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_ge_f16 vcc, v1, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_u16 vcc, v127, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_ge_f16 vcc, v1, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_le_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_le_u16_e64
+v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v1, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v127, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v127, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v127, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v1, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v127, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, v1, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, v128, v2
+// GFX11: v_cmp_ge_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_f16 vcc, v127, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, v128, v2
+// GFX11: v_cmp_ge_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lt_f16 vcc, v1, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, v127, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_lt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_ge_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_lt_i16 vcc, v1, v255
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_ge_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_lt_i16 vcc, v127, v255
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_ge_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ge_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_lt_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_ge_i16 vcc, v1, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_lt_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lt_i16_e64
-
-v_cmp_lt_i16 vcc, v1, v255
-// GFX11: v_cmp_lt_i16_e64
-
-v_cmp_lt_i16 vcc, v127, v255
-// GFX11: v_cmp_lt_i16_e64
-
-v_cmp_lt_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lt_i16_e64
-
-v_cmp_lt_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_ge_i16 vcc, v1, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_lt_u16 vcc, v1, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, v127, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, v1, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v127, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_u16 vcc, v127, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v127, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ne_i16 vcc, v1, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, v127, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v128, v2
+// GFX11: v_cmp_ge_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ne_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v128, v2
+// GFX11: v_cmp_ge_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ne_i16 vcc, v1, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ne_i16 vcc, v127, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ne_i16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ne_u16 vcc, v1, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v127, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ge_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v1, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_u16 vcc, v1, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v127, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_u16 vcc, v1, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ne_u16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ne_u16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v1, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v127, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v127, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v127, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v1, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v127, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v1, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, v128, v2
+// GFX11: v_cmp_ge_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nge_f16 vcc, v127, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, v128, v2
+// GFX11: v_cmp_ge_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v1, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v127, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_ge_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v1, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_ge_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v127, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_ge_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ge_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_gt_f16 vcc, v1, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_gt_f16 vcc, v1, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v1, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v127, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v1, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v127, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v127, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v127, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nle_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v1, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v127, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v128, v2
+// GFX11: v_cmp_gt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nle_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_gt_f16 vcc, v128, v2
+// GFX11: v_cmp_gt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v127, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v1, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_gt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_gt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v1, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v1, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v127, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v1, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v1, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v127, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v127, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_gt_i16 vcc, v127, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v1, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v127, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v1, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v128, v2
+// GFX11: v_cmp_gt_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_o_f16 vcc, v127, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v128, v2
+// GFX11: v_cmp_gt_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_o_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_o_f16_e64
+v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_gt_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v1, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_t_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v1, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v1, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v127, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v127, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v127, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_t_f16_e64
+v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v1, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v127, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v128, v2
+// GFX11: v_cmp_gt_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v128, v2
+// GFX11: v_cmp_gt_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, v1, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v127, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, vcc_hi, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, vcc_lo, v255
-// GFX11: v_cmp_u_f16_e64
+v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, v128, v2
-// GFX11: v_cmp_class_f16_e64
+v_cmp_gt_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2
-// GFX11: v_cmp_class_f16_e64
+v_cmp_gt_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_gt_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_gt_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_gt_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v128, v2
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_le_f16 vcc, v1, v255
+// GFX11: v_cmp_le_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v128, v2
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_le_f16 vcc, v1, v255
+// GFX11: v_cmp_le_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_u16 vcc, v128, v2
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v128, v2
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v128, v2
-// GFX11: v_cmp_f_f16_e64
+v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v128, v2
-// GFX11: v_cmp_f_f16_e64
+v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v128, v2
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_le_f16 vcc, v127, v255
+// GFX11: v_cmp_le_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_le_f16 vcc, v127, v255
+// GFX11: v_cmp_le_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ge_i16 vcc, v128, v2
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v128, v2
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_u16 vcc, v128, v2
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_u16 vcc, v128, v2
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v128, v2
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_le_f16 vcc, v128, v2
+// GFX11: v_cmp_le_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_f16 vcc, v128, v2
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_le_f16 vcc, v128, v2
+// GFX11: v_cmp_le_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_i16 vcc, v128, v2
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, v128, v2
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_u16 vcc, v128, v2
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_u16 vcc, v128, v2
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v128, v2
-// GFX11: v_cmp_le_f16_e64
+v_cmp_le_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_f16 vcc, v128, v2
-// GFX11: v_cmp_le_f16_e64
+v_cmp_le_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_i16 vcc, v128, v2
-// GFX11: v_cmp_le_i16_e64
+v_cmp_le_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_i16 vcc, v128, v2
-// GFX11: v_cmp_le_i16_e64
+v_cmp_le_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_le_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v128, v2
-// GFX11: v_cmp_le_u16_e64
+v_cmp_le_i16 vcc, v1, v255
+// GFX11: v_cmp_le_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_u16 vcc, v128, v2
-// GFX11: v_cmp_le_u16_e64
+v_cmp_le_i16 vcc, v1, v255
+// GFX11: v_cmp_le_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v128, v2
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v128, v2
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lt_f16 vcc, v128, v2
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, v128, v2
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_i16 vcc, v128, v2
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_le_i16 vcc, v127, v255
+// GFX11: v_cmp_le_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_i16 vcc, v128, v2
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_le_i16 vcc, v127, v255
+// GFX11: v_cmp_le_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_u16 vcc, v128, v2
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, v128, v2
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ne_i16 vcc, v128, v2
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, v128, v2
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ne_u16 vcc, v128, v2
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_le_i16 vcc, v128, v2
+// GFX11: v_cmp_le_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ne_u16 vcc, v128, v2
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_le_i16 vcc, v128, v2
+// GFX11: v_cmp_le_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_neq_f16 vcc, v128, v2
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v128, v2
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v128, v2
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v128, v2
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v128, v2
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_le_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_le_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_le_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_le_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_le_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_le_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_le_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_le_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v128, v2
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_le_u16 vcc, v1, v255
+// GFX11: v_cmp_le_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v128, v2
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_le_u16 vcc, v1, v255
+// GFX11: v_cmp_le_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v128, v2
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v128, v2
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v128, v2
-// GFX11: v_cmp_o_f16_e64
+v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v128, v2
-// GFX11: v_cmp_o_f16_e64
+v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64
+v_cmp_le_u16 vcc, v127, v255
+// GFX11: v_cmp_le_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_t_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64
+v_cmp_le_u16 vcc, v127, v255
+// GFX11: v_cmp_le_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_tru_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64
+v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v128, v2
-// GFX11: v_cmp_t_f16_e64
+v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v128, v2
-// GFX11: v_cmp_u_f16_e64
+v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v128, v2
-// GFX11: v_cmp_u_f16_e64
+v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_le_u16 vcc, v128, v2
+// GFX11: v_cmp_le_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_le_u16 vcc, v128, v2
+// GFX11: v_cmp_le_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_le_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_le_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_le_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_le_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_le_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_le_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_le_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_le_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_lg_f16 vcc, v1, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_lg_f16 vcc, v1, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_lg_f16 vcc, v127, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_lg_f16 vcc, v127, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_lg_f16 vcc, v128, v2
+// GFX11: v_cmp_lg_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_lg_f16 vcc, v128, v2
+// GFX11: v_cmp_lg_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_lg_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_lg_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_lg_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_lg_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lg_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_lt_f16 vcc, v1, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_lt_f16 vcc, v1, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_lt_f16 vcc, v127, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_lt_f16 vcc, v127, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_lt_f16 vcc, v128, v2
+// GFX11: v_cmp_lt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_lt_f16 vcc, v128, v2
+// GFX11: v_cmp_lt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_lt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_lt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_lt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_lt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_lt_i16 vcc, v1, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_lt_i16 vcc, v1, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_lt_i16 vcc, v127, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_lt_i16 vcc, v127, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
 v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
-
-v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
 v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_lt_i16 vcc, v128, v2
+// GFX11: v_cmp_lt_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_lt_i16 vcc, v128, v2
+// GFX11: v_cmp_lt_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_lt_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_lt_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_lt_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_lt_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lt_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_lt_u16 vcc, v1, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_lt_u16 vcc, v1, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_lt_u16 vcc, v127, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_lt_u16 vcc, v127, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_lt_u16 vcc, v128, v2
+// GFX11: v_cmp_lt_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_lt_u16 vcc, v128, v2
+// GFX11: v_cmp_lt_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_lt_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_lt_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_lt_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_lt_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_lt_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_ne_i16 vcc, v1, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_ne_i16 vcc, v1, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_ne_i16 vcc, v127, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_ne_i16 vcc, v127, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v128, v2
+// GFX11: v_cmp_ne_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v128, v2
+// GFX11: v_cmp_ne_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_ne_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_ne_i16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_ne_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_ne_i16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ne_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_ne_u16 vcc, v1, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_ne_u16 vcc, v1, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_ne_u16 vcc, v127, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_ne_u16 vcc, v127, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_ne_u16 vcc, v128, v2
+// GFX11: v_cmp_ne_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_ne_u16 vcc, v128, v2
+// GFX11: v_cmp_ne_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_ne_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_ne_u16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_ne_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_ne_u16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ne_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_neq_f16 vcc, v1, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_neq_f16 vcc, v1, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_neq_f16 vcc, v127, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_neq_f16 vcc, v127, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_neq_f16 vcc, v128, v2
+// GFX11: v_cmp_neq_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_neq_f16 vcc, v128, v2
+// GFX11: v_cmp_neq_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
 v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
 v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+// GFX11: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_neq_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_neq_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_neq_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_neq_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_neq_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_nge_f16 vcc, v1, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_nge_f16 vcc, v1, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_nge_f16 vcc, v127, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_nge_f16 vcc, v127, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_nge_f16 vcc, v128, v2
+// GFX11: v_cmp_nge_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_nge_f16 vcc, v128, v2
+// GFX11: v_cmp_nge_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_nge_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_nge_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_nge_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_nge_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nge_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_ngt_f16 vcc, v1, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_ngt_f16 vcc, v1, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_ngt_f16 vcc, v127, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_ngt_f16 vcc, v127, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_ngt_f16 vcc, v128, v2
+// GFX11: v_cmp_ngt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_ngt_f16 vcc, v128, v2
+// GFX11: v_cmp_ngt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_ngt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_ngt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_ngt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_ngt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_nle_f16 vcc, v1, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_nle_f16 vcc, v1, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_nle_f16 vcc, v127, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_nle_f16 vcc, v127, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_nle_f16 vcc, v128, v2
+// GFX11: v_cmp_nle_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_nle_f16 vcc, v128, v2
+// GFX11: v_cmp_nle_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_nle_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_nle_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_nle_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_nle_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nle_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_nlg_f16 vcc, v1, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_nlg_f16 vcc, v1, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_nlg_f16 vcc, v127, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_nlg_f16 vcc, v127, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_nlg_f16 vcc, v128, v2
+// GFX11: v_cmp_nlg_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_nlg_f16 vcc, v128, v2
+// GFX11: v_cmp_nlg_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_nlg_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_nlg_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_nlg_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_nlg_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_nlt_f16 vcc, v1, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_nlt_f16 vcc, v1, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_nlt_f16 vcc, v127, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_nlt_f16 vcc, v127, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_nlt_f16 vcc, v128, v2
+// GFX11: v_cmp_nlt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_nlt_f16 vcc, v128, v2
+// GFX11: v_cmp_nlt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_nlt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_nlt_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_nlt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_nlt_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_o_f16 vcc, v1, v255
+// GFX11: v_cmp_o_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_o_f16 vcc, v1, v255
+// GFX11: v_cmp_o_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_o_f16 vcc, v127, v255
+// GFX11: v_cmp_o_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_o_f16 vcc, v127, v255
+// GFX11: v_cmp_o_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
 
 v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
-
-v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
 v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v128, v2
+// GFX11: v_cmp_o_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v128, v2
+// GFX11: v_cmp_o_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_o_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_o_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_o_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+v_cmp_o_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_o_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_t_f16 vcc, v1, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_class_f16_e64
+v_cmp_t_f16 vcc, v1, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_f16_e64
+v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_i16_e64
+v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_t_f16 vcc, v127, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_eq_u16_e64
+v_cmp_t_f16 vcc, v127, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_f_f16_e64
+v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_f16_e64
+v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_t_f16 vcc, v128, v2
+// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_i16_e64
+v_cmp_t_f16 vcc, v128, v2
+// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ge_u16_e64
+v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_f16_e64
+v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_t_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_i16_e64
+v_cmp_t_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_t_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_gt_u16_e64
+v_cmp_t_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_tru_f16 vcc, v1, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_f16_e64
+v_cmp_tru_f16 vcc, v1, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x0f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_i16_e64
+v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_le_u16_e64
+v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_tru_f16 vcc, v127, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lg_f16_e64
+v_cmp_tru_f16 vcc, v127, v255
+// GFX11: v_cmp_t_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x0f,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_f16_e64
+v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_i16_e64
+v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_tru_f16 vcc, v128, v2
+// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_lt_u16_e64
+v_cmp_tru_f16 vcc, v128, v2
+// GFX11: v_cmp_t_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x0f,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_i16_e64
+v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0f,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
-v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ne_u16_e64
+v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_t_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0f,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
 
-v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_tru_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_neq_f16_e64
+v_cmp_tru_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6b,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_tru_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nge_f16_e64
+v_cmp_tru_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_t_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x0f,0xd4,0x6a,0xfe,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_u_f16 vcc, v1, v255
+// GFX11: v_cmp_u_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_ngt_f16_e64
+v_cmp_u_f16 vcc, v1, v255
+// GFX11: v_cmp_u_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nle_f16_e64
+v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlg_f16_e64
+v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_u_f16 vcc, v127, v255
+// GFX11: v_cmp_u_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_nlt_f16_e64
+v_cmp_u_f16 vcc, v127, v255
+// GFX11: v_cmp_u_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
 
-v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_o_f16_e64
+v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
 
-v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
 
-v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_u_f16 vcc, v128, v2
+// GFX11: v_cmp_u_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
 
-v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_t_f16_e64
+v_cmp_u_f16 vcc, v128, v2
+// GFX11: v_cmp_u_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
 
 v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
 
 v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmp_u_f16_e64
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+
+v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+
+v_cmp_u_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_u_f16 vcc, vcc_hi, v255
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+
+v_cmp_u_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
 
+v_cmp_u_f16 vcc, vcc_lo, v255
+// GFX11: v_cmp_u_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s
new file mode 100644
index 000000000000000..33a5e7c140b5f2e
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx-fake16.s
@@ -0,0 +1,4106 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+
+v_cmpx_class_f16_e32 v1, v2
+// GFX11: encoding: [0x01,0x05,0xfa,0x7d]
+
+v_cmpx_class_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0xfa,0x7d]
+
+v_cmpx_class_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0xfa,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_class_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0xfc,0x7d]
+
+v_cmpx_class_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0xfc,0x7d]
+
+v_cmpx_class_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0xfd,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_class_f64 v[1:2], v2
+// GFX11: encoding: [0x01,0x05,0xfe,0x7d]
+
+v_cmpx_class_f64 v[254:255], v2
+// GFX11: encoding: [0xfe,0x05,0xfe,0x7d]
+
+v_cmpx_class_f64 s[2:3], v2
+// GFX11: encoding: [0x02,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 s[104:105], v2
+// GFX11: encoding: [0x68,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 vcc, v2
+// GFX11: encoding: [0x6a,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 ttmp[14:15], v2
+// GFX11: encoding: [0x7a,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 exec, v2
+// GFX11: encoding: [0x7e,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 null, v2
+// GFX11: encoding: [0x7c,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 -1, v2
+// GFX11: encoding: [0xc1,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0xff,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x04,0x7d]
+
+v_cmpx_eq_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x04,0x7d]
+
+v_cmpx_eq_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x04,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x24,0x7d]
+
+v_cmpx_eq_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x24,0x7d]
+
+v_cmpx_eq_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x25,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x44,0x7d]
+
+v_cmpx_eq_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x44,0x7d]
+
+v_cmpx_eq_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x45,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x64,0x7d]
+
+v_cmpx_eq_i16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x64,0x7d]
+
+v_cmpx_eq_i16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x64,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x84,0x7d]
+
+v_cmpx_eq_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x84,0x7d]
+
+v_cmpx_eq_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x85,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xa4,0x7d]
+
+v_cmpx_eq_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xa4,0x7d]
+
+v_cmpx_eq_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xa5,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x74,0x7d]
+
+v_cmpx_eq_u16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x74,0x7d]
+
+v_cmpx_eq_u16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x74,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x94,0x7d]
+
+v_cmpx_eq_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x94,0x7d]
+
+v_cmpx_eq_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x95,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xb4,0x7d]
+
+v_cmpx_eq_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xb4,0x7d]
+
+v_cmpx_eq_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xb5,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_f_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x00,0x7d]
+
+v_cmpx_f_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x00,0x7d]
+
+v_cmpx_f_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x00,0x7d]
+
+v_cmpx_f_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x00,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_f_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x20,0x7d]
+
+v_cmpx_f_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x20,0x7d]
+
+v_cmpx_f_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x20,0x7d]
+
+v_cmpx_f_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x21,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_f_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x40,0x7d]
+
+v_cmpx_f_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x40,0x7d]
+
+v_cmpx_f_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x40,0x7d]
+
+v_cmpx_f_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x41,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_f_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x80,0x7d]
+
+v_cmpx_f_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x80,0x7d]
+
+v_cmpx_f_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x80,0x7d]
+
+v_cmpx_f_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x81,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_f_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xa0,0x7d]
+
+v_cmpx_f_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xa0,0x7d]
+
+v_cmpx_f_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xa0,0x7d]
+
+v_cmpx_f_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xa1,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_f_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x90,0x7d]
+
+v_cmpx_f_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x90,0x7d]
+
+v_cmpx_f_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x90,0x7d]
+
+v_cmpx_f_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x91,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_f_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xb0,0x7d]
+
+v_cmpx_f_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xb0,0x7d]
+
+v_cmpx_f_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xb0,0x7d]
+
+v_cmpx_f_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xb1,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x0c,0x7d]
+
+v_cmpx_ge_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x0c,0x7d]
+
+v_cmpx_ge_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x0c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x2c,0x7d]
+
+v_cmpx_ge_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x2c,0x7d]
+
+v_cmpx_ge_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x2d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x4c,0x7d]
+
+v_cmpx_ge_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x4c,0x7d]
+
+v_cmpx_ge_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x4d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x6c,0x7d]
+
+v_cmpx_ge_i16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x6c,0x7d]
+
+v_cmpx_ge_i16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x6c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x8c,0x7d]
+
+v_cmpx_ge_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x8c,0x7d]
+
+v_cmpx_ge_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x8d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xac,0x7d]
+
+v_cmpx_ge_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xac,0x7d]
+
+v_cmpx_ge_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xad,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x7c,0x7d]
+
+v_cmpx_ge_u16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x7c,0x7d]
+
+v_cmpx_ge_u16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x7c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x9c,0x7d]
+
+v_cmpx_ge_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x9c,0x7d]
+
+v_cmpx_ge_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x9d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xbc,0x7d]
+
+v_cmpx_ge_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xbc,0x7d]
+
+v_cmpx_ge_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xbd,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x08,0x7d]
+
+v_cmpx_gt_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x08,0x7d]
+
+v_cmpx_gt_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x08,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x28,0x7d]
+
+v_cmpx_gt_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x28,0x7d]
+
+v_cmpx_gt_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x29,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x48,0x7d]
+
+v_cmpx_gt_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x48,0x7d]
+
+v_cmpx_gt_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x49,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x68,0x7d]
+
+v_cmpx_gt_i16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x68,0x7d]
+
+v_cmpx_gt_i16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x68,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x88,0x7d]
+
+v_cmpx_gt_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x88,0x7d]
+
+v_cmpx_gt_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x89,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xa8,0x7d]
+
+v_cmpx_gt_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xa8,0x7d]
+
+v_cmpx_gt_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xa9,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x78,0x7d]
+
+v_cmpx_gt_u16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x78,0x7d]
+
+v_cmpx_gt_u16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x78,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x98,0x7d]
+
+v_cmpx_gt_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x98,0x7d]
+
+v_cmpx_gt_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x99,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xb8,0x7d]
+
+v_cmpx_gt_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xb8,0x7d]
+
+v_cmpx_gt_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xb9,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x06,0x7d]
+
+v_cmpx_le_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x06,0x7d]
+
+v_cmpx_le_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x06,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x26,0x7d]
+
+v_cmpx_le_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x26,0x7d]
+
+v_cmpx_le_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x27,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x46,0x7d]
+
+v_cmpx_le_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x46,0x7d]
+
+v_cmpx_le_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x47,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x66,0x7d]
+
+v_cmpx_le_i16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x66,0x7d]
+
+v_cmpx_le_i16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x66,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x86,0x7d]
+
+v_cmpx_le_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x86,0x7d]
+
+v_cmpx_le_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x87,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xa6,0x7d]
+
+v_cmpx_le_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xa6,0x7d]
+
+v_cmpx_le_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xa7,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x76,0x7d]
+
+v_cmpx_le_u16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x76,0x7d]
+
+v_cmpx_le_u16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x76,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x96,0x7d]
+
+v_cmpx_le_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x96,0x7d]
+
+v_cmpx_le_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x97,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xb6,0x7d]
+
+v_cmpx_le_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xb6,0x7d]
+
+v_cmpx_le_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xb7,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x0a,0x7d]
+
+v_cmpx_lg_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x0a,0x7d]
+
+v_cmpx_lg_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x0a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lg_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x2a,0x7d]
+
+v_cmpx_lg_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x2a,0x7d]
+
+v_cmpx_lg_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x2b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x4a,0x7d]
+
+v_cmpx_lg_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x4a,0x7d]
+
+v_cmpx_lg_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x02,0x7d]
+
+v_cmpx_lt_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x02,0x7d]
+
+v_cmpx_lt_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x22,0x7d]
+
+v_cmpx_lt_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x22,0x7d]
+
+v_cmpx_lt_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x23,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x42,0x7d]
+
+v_cmpx_lt_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x42,0x7d]
+
+v_cmpx_lt_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x43,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x62,0x7d]
+
+v_cmpx_lt_i16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x62,0x7d]
+
+v_cmpx_lt_i16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x62,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x82,0x7d]
+
+v_cmpx_lt_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x82,0x7d]
+
+v_cmpx_lt_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x83,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xa2,0x7d]
+
+v_cmpx_lt_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xa2,0x7d]
+
+v_cmpx_lt_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xa3,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x72,0x7d]
+
+v_cmpx_lt_u16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x72,0x7d]
+
+v_cmpx_lt_u16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x72,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x92,0x7d]
+
+v_cmpx_lt_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x92,0x7d]
+
+v_cmpx_lt_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x93,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xb2,0x7d]
+
+v_cmpx_lt_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xb2,0x7d]
+
+v_cmpx_lt_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xb3,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x6a,0x7d]
+
+v_cmpx_ne_i16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x6a,0x7d]
+
+v_cmpx_ne_i16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x6a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x8a,0x7d]
+
+v_cmpx_ne_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x8a,0x7d]
+
+v_cmpx_ne_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x8b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xaa,0x7d]
+
+v_cmpx_ne_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xaa,0x7d]
+
+v_cmpx_ne_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xab,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x7a,0x7d]
+
+v_cmpx_ne_u16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x7a,0x7d]
+
+v_cmpx_ne_u16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x7a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x9a,0x7d]
+
+v_cmpx_ne_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x9a,0x7d]
+
+v_cmpx_ne_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x9b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xba,0x7d]
+
+v_cmpx_ne_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xba,0x7d]
+
+v_cmpx_ne_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xbb,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x1a,0x7d]
+
+v_cmpx_neq_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x1a,0x7d]
+
+v_cmpx_neq_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x1a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_neq_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x3a,0x7d]
+
+v_cmpx_neq_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x3a,0x7d]
+
+v_cmpx_neq_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x3b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x5a,0x7d]
+
+v_cmpx_neq_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x5a,0x7d]
+
+v_cmpx_neq_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x5b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x12,0x7d]
+
+v_cmpx_nge_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x12,0x7d]
+
+v_cmpx_nge_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x12,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nge_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x32,0x7d]
+
+v_cmpx_nge_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x32,0x7d]
+
+v_cmpx_nge_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x33,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x52,0x7d]
+
+v_cmpx_nge_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x52,0x7d]
+
+v_cmpx_nge_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x53,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x16,0x7d]
+
+v_cmpx_ngt_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x16,0x7d]
+
+v_cmpx_ngt_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x16,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ngt_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x36,0x7d]
+
+v_cmpx_ngt_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x36,0x7d]
+
+v_cmpx_ngt_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x37,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x56,0x7d]
+
+v_cmpx_ngt_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x56,0x7d]
+
+v_cmpx_ngt_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x57,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x18,0x7d]
+
+v_cmpx_nle_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x18,0x7d]
+
+v_cmpx_nle_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x18,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nle_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x38,0x7d]
+
+v_cmpx_nle_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x38,0x7d]
+
+v_cmpx_nle_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x39,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x58,0x7d]
+
+v_cmpx_nle_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x58,0x7d]
+
+v_cmpx_nle_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x59,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x14,0x7d]
+
+v_cmpx_nlg_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x14,0x7d]
+
+v_cmpx_nlg_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x14,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlg_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x34,0x7d]
+
+v_cmpx_nlg_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x34,0x7d]
+
+v_cmpx_nlg_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x35,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x54,0x7d]
+
+v_cmpx_nlg_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x54,0x7d]
+
+v_cmpx_nlg_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x55,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x1c,0x7d]
+
+v_cmpx_nlt_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x1c,0x7d]
+
+v_cmpx_nlt_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x1c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlt_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x3c,0x7d]
+
+v_cmpx_nlt_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x3c,0x7d]
+
+v_cmpx_nlt_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x3d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x5c,0x7d]
+
+v_cmpx_nlt_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x5c,0x7d]
+
+v_cmpx_nlt_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x5d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x0e,0x7d]
+
+v_cmpx_o_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x0e,0x7d]
+
+v_cmpx_o_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x0e,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_o_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x2e,0x7d]
+
+v_cmpx_o_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x2e,0x7d]
+
+v_cmpx_o_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x2f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x4e,0x7d]
+
+v_cmpx_o_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x4e,0x7d]
+
+v_cmpx_o_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x4f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_t_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x1e,0x7d]
+
+v_cmpx_t_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x1e,0x7d]
+
+v_cmpx_t_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x1e,0x7d]
+
+v_cmpx_t_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x1e,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_t_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x3e,0x7d]
+
+v_cmpx_t_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x3e,0x7d]
+
+v_cmpx_t_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x3e,0x7d]
+
+v_cmpx_t_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x3f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_t_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x5e,0x7d]
+
+v_cmpx_t_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x5e,0x7d]
+
+v_cmpx_t_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x5e,0x7d]
+
+v_cmpx_t_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x5f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_t_i32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x8e,0x7d]
+
+v_cmpx_t_i32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x8e,0x7d]
+
+v_cmpx_t_i32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x8e,0x7d]
+
+v_cmpx_t_i32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x8f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_t_i64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xae,0x7d]
+
+v_cmpx_t_i64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xae,0x7d]
+
+v_cmpx_t_i64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xae,0x7d]
+
+v_cmpx_t_i64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xaf,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_t_u32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x9e,0x7d]
+
+v_cmpx_t_u32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x9e,0x7d]
+
+v_cmpx_t_u32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x9e,0x7d]
+
+v_cmpx_t_u32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x9f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_t_u64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0xbe,0x7d]
+
+v_cmpx_t_u64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0xbe,0x7d]
+
+v_cmpx_t_u64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0xbe,0x7d]
+
+v_cmpx_t_u64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0xbf,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_tru_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x1e,0x7d]
+
+v_cmpx_tru_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x1e,0x7d]
+
+v_cmpx_tru_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x1e,0x7d]
+
+v_cmpx_tru_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x1e,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_tru_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x3e,0x7d]
+
+v_cmpx_tru_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x3e,0x7d]
+
+v_cmpx_tru_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x3e,0x7d]
+
+v_cmpx_tru_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x3f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_tru_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x5e,0x7d]
+
+v_cmpx_tru_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x5e,0x7d]
+
+v_cmpx_tru_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x5e,0x7d]
+
+v_cmpx_tru_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x5f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f16 v1, v2
+// GFX11: encoding: [0x01,0x05,0x10,0x7d]
+
+v_cmpx_u_f16 v127, v2
+// GFX11: encoding: [0x7f,0x05,0x10,0x7d]
+
+v_cmpx_u_f16 s1, v2
+// GFX11: encoding: [0x01,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 s105, v2
+// GFX11: encoding: [0x69,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 null, v2
+// GFX11: encoding: [0x7c,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 0xfe0b, v127
+// GFX11: encoding: [0xff,0xfe,0x10,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_u_f32 v1, v2
+// GFX11: encoding: [0x01,0x05,0x30,0x7d]
+
+v_cmpx_u_f32 v255, v2
+// GFX11: encoding: [0xff,0x05,0x30,0x7d]
+
+v_cmpx_u_f32 s1, v2
+// GFX11: encoding: [0x01,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 s105, v2
+// GFX11: encoding: [0x69,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 vcc_lo, v2
+// GFX11: encoding: [0x6a,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 vcc_hi, v2
+// GFX11: encoding: [0x6b,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 ttmp15, v2
+// GFX11: encoding: [0x7b,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 m0, v2
+// GFX11: encoding: [0x7d,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 exec_lo, v2
+// GFX11: encoding: [0x7e,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 exec_hi, v2
+// GFX11: encoding: [0x7f,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 null, v2
+// GFX11: encoding: [0x7c,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 -1, v2
+// GFX11: encoding: [0xc1,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 0.5, v2
+// GFX11: encoding: [0xf0,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 src_scc, v2
+// GFX11: encoding: [0xfd,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 0xaf123456, v255
+// GFX11: encoding: [0xff,0xfe,0x31,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f64 v[1:2], v[2:3]
+// GFX11: encoding: [0x01,0x05,0x50,0x7d]
+
+v_cmpx_u_f64 v[254:255], v[2:3]
+// GFX11: encoding: [0xfe,0x05,0x50,0x7d]
+
+v_cmpx_u_f64 s[2:3], v[2:3]
+// GFX11: encoding: [0x02,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 s[104:105], v[2:3]
+// GFX11: encoding: [0x68,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 vcc, v[2:3]
+// GFX11: encoding: [0x6a,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 ttmp[14:15], v[2:3]
+// GFX11: encoding: [0x7a,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 exec, v[2:3]
+// GFX11: encoding: [0x7e,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 null, v[2:3]
+// GFX11: encoding: [0x7c,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 -1, v[2:3]
+// GFX11: encoding: [0xc1,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 0.5, v[2:3]
+// GFX11: encoding: [0xf0,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 src_scc, v[2:3]
+// GFX11: encoding: [0xfd,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 0xaf123456, v[254:255]
+// GFX11: encoding: [0xff,0xfc,0x51,0x7d,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
index e90bb80f098cb0f..17e60c08f6a77ea 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
 v_cmpx_class_f16_e32 v1, v2
 // GFX11: encoding: [0x01,0x05,0xfa,0x7d]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s
new file mode 100644
index 000000000000000..897eefe1fcbe22d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16-fake16.s
@@ -0,0 +1,2690 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+
+v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_class_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f16 -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0xfa,0x7d,0x7f,0x6f,0x35,0x30]
+
+v_cmpx_class_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_class_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f32 -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0xfd,0x7d,0xff,0x6f,0x35,0x30]
+
+v_cmpx_eq_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x04,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_eq_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x25,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_eq_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x64,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_eq_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x85,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x74,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_eq_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x95,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_f_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_f_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_f_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_f_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_f_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x00,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_f_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x00,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_f_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_f_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_f_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_f_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_f_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x20,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_f_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x21,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_f_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_f_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_f_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_f_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_f_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x80,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_f_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x81,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_f_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_f_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_f_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_f_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_f_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x90,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_f_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x91,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x0c,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_ge_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x2d,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_ge_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x6c,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ge_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x8d,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x7c,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ge_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x9d,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x08,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_gt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x29,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_gt_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x68,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_gt_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x89,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x78,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_gt_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x99,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x06,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_le_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x27,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_le_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_i16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x66,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_le_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x87,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_u16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x76,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_le_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x97,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lg_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x0a,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_lg_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x23,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_lt_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x62,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_lt_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x83,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x72,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_lt_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x93,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x6a,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ne_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x8b,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x7a,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ne_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x9b,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_neq_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_neq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_neq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x1a,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_neq_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_neq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_neq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x3b,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nge_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x12,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nge_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x33,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_ngt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ngt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ngt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x16,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_ngt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ngt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ngt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x37,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nle_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nle_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nle_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x18,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nle_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nle_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nle_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x39,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nlg_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x14,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nlg_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x35,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nlt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x1c,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nlt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x3d,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_o_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_o_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_o_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_o_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x0e,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_o_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_o_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_o_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_o_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x2f,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_t_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_t_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_t_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_t_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_t_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_t_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x1e,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_t_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_t_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_t_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_t_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_t_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_t_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x3f,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_t_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_t_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_t_i32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_t_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_t_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x8e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_t_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x8f,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_t_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_t_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_t_u32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_t_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_t_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x9e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_t_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x9f,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_tru_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_tru_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_tru_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_tru_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x1e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_tru_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x1e,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_tru_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_tru_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_tru_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_tru_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x3e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_tru_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x3f,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_u_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_u_f16 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_u_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_u_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x10,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_u_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_u_f32 v1, v2 row_mirror
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_half_mirror
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shl:1
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shl:15
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shr:1
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shr:15
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_ror:1
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_ror:15
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_u_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_u_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: encoding: [0xfa,0xfe,0x31,0x7d,0xff,0x6f,0xf5,0x30]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s
index d8fc1d3e2b3cd9b..e46661df84a1554 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp16.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
 v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX11: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s
new file mode 100644
index 000000000000000..e66da32fe0329a8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8-fake16.s
@@ -0,0 +1,578 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+
+v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0xfa,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0xfd,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x04,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x04,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x04,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x24,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x24,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x25,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x64,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x64,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x64,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x84,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x84,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x85,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x74,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x74,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x74,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x94,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x94,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x95,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_f_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x00,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x00,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x00,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_f_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x20,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x20,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x21,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_f_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x80,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x80,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x81,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_f_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x90,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x90,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_f_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x91,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x0c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x2d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x6c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x8d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x7c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x9d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x08,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x08,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x08,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x28,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x28,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x29,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x68,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x68,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x68,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x88,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x88,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x89,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x78,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x78,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x78,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x98,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x98,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x99,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x06,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x06,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x06,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x26,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x26,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x27,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x66,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x66,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x66,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x86,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x86,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x87,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x76,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x76,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x76,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x96,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x96,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x97,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x0a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x22,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x23,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x62,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x62,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x62,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x82,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x82,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x83,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x72,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x72,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x72,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x92,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x92,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x93,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x6a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x8b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x7a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x9b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x1a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x3b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x12,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x12,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x12,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x32,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x32,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x33,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x16,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x16,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x16,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x36,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x36,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x37,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x18,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x18,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x18,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x38,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x38,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x39,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x14,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x14,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x14,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x34,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x34,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x35,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x1c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x3d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x0e,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x2f,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_t_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x1e,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_t_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x3f,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_t_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x8e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x8e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x8f,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_t_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x9e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x9e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_t_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x9f,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_tru_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_tru_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x1e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_tru_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x1e,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_tru_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_tru_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x3e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_tru_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x3f,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x10,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x10,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x10,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: encoding: [0xe9,0x04,0x30,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: encoding: [0xea,0x04,0x30,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: encoding: [0xe9,0xfe,0x31,0x7d,0xff,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s
index 9db7e48809ee154..5062f901d2aa3a0 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_dpp8.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11 %s
 
 v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s
index c37d15b7abc79bd..ec628dd94f366ba 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_err.s
@@ -1,542 +1,542 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
 
 v_cmpx_class_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_eq_u16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_f_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_f_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_f_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_f_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_t_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_f_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_tru_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_f_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v1, v255
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_f_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_class_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_u16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_f_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_t_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_i16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_tru_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v255, v2
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_f_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_t_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_f_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
 v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_t_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_f_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_t_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_f_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_t_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_t_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_t_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:22: error: invalid operand for instruction
 
-v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_t_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_t_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_t_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:18: error: invalid operand for instruction
 
-v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_tru_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_tru_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_tru_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:24: error: invalid operand for instruction
 
-v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_tru_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_tru_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:20: error: invalid operand for instruction
 
-v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v1, v255
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_t_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_tru_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v255, v2
+// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmpx_u_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
 
+v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: :[[@LINE-1]]:27: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s
index a279bd381848e82..3bbdf3d3a903f87 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopcx_t16_promote.s
@@ -1,542 +1,542 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 %s
 
 v_cmpx_class_f16 v1, v255
-// GFX11: v_cmpx_class_f16_e64
+// GFX11: v_cmpx_class_f16_e64 v1, v255           ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_f16 v1, v255
-// GFX11: v_cmpx_eq_f16_e64
+v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_i16 v1, v255
-// GFX11: v_cmpx_eq_i16_e64
+v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_u16 v1, v255
-// GFX11: v_cmpx_eq_u16_e64
+v_cmpx_class_f16 v255, v2
+// GFX11: v_cmpx_class_f16_e64 v255, v2           ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_f_f16 v1, v255
-// GFX11: v_cmpx_f_f16_e64
+v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v1, v255
-// GFX11: v_cmpx_ge_f16_e64
+v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_class_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v1, v255
-// GFX11: v_cmpx_ge_i16_e64
+v_cmpx_eq_f16 v1, v255
+// GFX11: v_cmpx_eq_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v1, v255
-// GFX11: v_cmpx_ge_u16_e64
+v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_eq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v1, v255
-// GFX11: v_cmpx_gt_f16_e64
+v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v1, v255
-// GFX11: v_cmpx_gt_i16_e64
+v_cmpx_eq_f16 v255, v2
+// GFX11: v_cmpx_eq_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x82,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v1, v255
-// GFX11: v_cmpx_gt_u16_e64
+v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_eq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v1, v255
-// GFX11: v_cmpx_le_f16_e64
+v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v1, v255
-// GFX11: v_cmpx_le_i16_e64
+v_cmpx_eq_i16 v1, v255
+// GFX11: v_cmpx_eq_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v1, v255
-// GFX11: v_cmpx_le_u16_e64
+v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_eq_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v1, v255
-// GFX11: v_cmpx_lg_f16_e64
+v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v1, v255
-// GFX11: v_cmpx_lt_f16_e64
+v_cmpx_eq_i16 v255, v2
+// GFX11: v_cmpx_eq_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v1, v255
-// GFX11: v_cmpx_lt_i16_e64
+v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_eq_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v1, v255
-// GFX11: v_cmpx_lt_u16_e64
+v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v1, v255
-// GFX11: v_cmpx_ne_i16_e64
+v_cmpx_eq_u16 v1, v255
+// GFX11: v_cmpx_eq_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ne_u16 v1, v255
-// GFX11: v_cmpx_ne_u16_e64
+v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_eq_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v1, v255
-// GFX11: v_cmpx_neq_f16_e64
+v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v1, v255
-// GFX11: v_cmpx_nge_f16_e64
+v_cmpx_eq_u16 v255, v2
+// GFX11: v_cmpx_eq_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ngt_f16 v1, v255
-// GFX11: v_cmpx_ngt_f16_e64
+v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_eq_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v1, v255
-// GFX11: v_cmpx_nle_f16_e64
+v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_eq_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v1, v255
-// GFX11: v_cmpx_nlg_f16_e64
+v_cmpx_f_f16 v1, v255
+// GFX11: v_cmpx_f_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x80,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_nlt_f16 v1, v255
-// GFX11: v_cmpx_nlt_f16_e64
+v_cmpx_f_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_f_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x80,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v1, v255
-// GFX11: v_cmpx_o_f16_e64
+v_cmpx_f_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_f_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_t_f16 v1, v255
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_f_f16 v255, v2
+// GFX11: v_cmpx_f_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x80,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_tru_f16 v1, v255
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_f_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_f_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x80,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_u_f16 v1, v255
-// GFX11: v_cmpx_u_f16_e64
+v_cmpx_f_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_f_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x80,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_class_f16 v255, v2
-// GFX11: v_cmpx_class_f16_e64
+v_cmpx_ge_f16 v1, v255
+// GFX11: v_cmpx_ge_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_f16 v255, v2
-// GFX11: v_cmpx_eq_f16_e64
+v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_i16 v255, v2
-// GFX11: v_cmpx_eq_i16_e64
+v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_u16 v255, v2
-// GFX11: v_cmpx_eq_u16_e64
+v_cmpx_ge_f16 v255, v2
+// GFX11: v_cmpx_ge_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x86,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_f_f16 v255, v2
-// GFX11: v_cmpx_f_f16_e64
+v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v255, v2
-// GFX11: v_cmpx_ge_f16_e64
+v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v255, v2
-// GFX11: v_cmpx_ge_i16_e64
+v_cmpx_ge_i16 v1, v255
+// GFX11: v_cmpx_ge_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v255, v2
-// GFX11: v_cmpx_ge_u16_e64
+v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ge_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v255, v2
-// GFX11: v_cmpx_gt_f16_e64
+v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v255, v2
-// GFX11: v_cmpx_gt_i16_e64
+v_cmpx_ge_i16 v255, v2
+// GFX11: v_cmpx_ge_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v255, v2
-// GFX11: v_cmpx_gt_u16_e64
+v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ge_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v255, v2
-// GFX11: v_cmpx_le_f16_e64
+v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v255, v2
-// GFX11: v_cmpx_le_i16_e64
+v_cmpx_ge_u16 v1, v255
+// GFX11: v_cmpx_ge_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v255, v2
-// GFX11: v_cmpx_le_u16_e64
+v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ge_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v255, v2
-// GFX11: v_cmpx_lg_f16_e64
+v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v255, v2
-// GFX11: v_cmpx_lt_f16_e64
+v_cmpx_ge_u16 v255, v2
+// GFX11: v_cmpx_ge_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v255, v2
-// GFX11: v_cmpx_lt_i16_e64
+v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ge_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v255, v2
-// GFX11: v_cmpx_lt_u16_e64
+v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ge_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v255, v2
-// GFX11: v_cmpx_ne_i16_e64
+v_cmpx_gt_f16 v1, v255
+// GFX11: v_cmpx_gt_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ne_u16 v255, v2
-// GFX11: v_cmpx_ne_u16_e64
+v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_gt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v255, v2
-// GFX11: v_cmpx_neq_f16_e64
+v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v255, v2
-// GFX11: v_cmpx_nge_f16_e64
+v_cmpx_gt_f16 v255, v2
+// GFX11: v_cmpx_gt_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x84,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ngt_f16 v255, v2
-// GFX11: v_cmpx_ngt_f16_e64
+v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_gt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v255, v2
-// GFX11: v_cmpx_nle_f16_e64
+v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v255, v2
-// GFX11: v_cmpx_nlg_f16_e64
+v_cmpx_gt_i16 v1, v255
+// GFX11: v_cmpx_gt_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_nlt_f16 v255, v2
-// GFX11: v_cmpx_nlt_f16_e64
+v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_gt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v255, v2
-// GFX11: v_cmpx_o_f16_e64
+v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_t_f16 v255, v2
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_gt_i16 v255, v2
+// GFX11: v_cmpx_gt_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_tru_f16 v255, v2
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_gt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_u_f16 v255, v2
-// GFX11: v_cmpx_u_f16_e64
+v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64
+v_cmpx_gt_u16 v1, v255
+// GFX11: v_cmpx_gt_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_f16_e64
+v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_gt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_i16_e64
+v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_u16_e64
+v_cmpx_gt_u16 v255, v2
+// GFX11: v_cmpx_gt_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_f_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_f_f16_e64
+v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_gt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_f16_e64
+v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_gt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_i16_e64
+v_cmpx_le_f16 v1, v255
+// GFX11: v_cmpx_le_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_u16_e64
+v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_le_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_f16_e64
+v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_le_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_i16_e64
+v_cmpx_le_f16 v255, v2
+// GFX11: v_cmpx_le_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x83,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_u16_e64
+v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_le_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_le_f16_e64
+v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_le_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_le_i16_e64
+v_cmpx_le_i16 v1, v255
+// GFX11: v_cmpx_le_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_le_u16_e64
+v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_le_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lg_f16_e64
+v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_le_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64
+v_cmpx_le_i16 v255, v2
+// GFX11: v_cmpx_le_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_i16_e64
+v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_le_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_u16_e64
+v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_le_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ne_i16_e64
+v_cmpx_le_u16 v1, v255
+// GFX11: v_cmpx_le_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ne_u16_e64
+v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_le_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_neq_f16_e64
+v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_le_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nge_f16_e64
+v_cmpx_le_u16 v255, v2
+// GFX11: v_cmpx_le_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ngt_f16_e64
+v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_le_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nle_f16_e64
+v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_le_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nlg_f16_e64
+v_cmpx_lg_f16 v1, v255
+// GFX11: v_cmpx_lg_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nlt_f16_e64
+v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_o_f16_e64
+v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_t_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_lg_f16 v255, v2
+// GFX11: v_cmpx_lg_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x85,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_tru_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_u_f16_e64
+v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64
+v_cmpx_lt_f16 v1, v255
+// GFX11: v_cmpx_lt_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_f16_e64
+v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_i16_e64
+v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_eq_u16_e64
+v_cmpx_lt_f16 v255, v2
+// GFX11: v_cmpx_lt_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_f_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_f_f16_e64
+v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_f16_e64
+v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_i16_e64
+v_cmpx_lt_i16 v1, v255
+// GFX11: v_cmpx_lt_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ge_u16_e64
+v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_f16_e64
+v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_i16_e64
+v_cmpx_lt_i16 v255, v2
+// GFX11: v_cmpx_lt_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_gt_u16_e64
+v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_le_f16_e64
+v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_le_i16_e64
+v_cmpx_lt_u16 v1, v255
+// GFX11: v_cmpx_lt_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_le_u16_e64
+v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lg_f16_e64
+v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_lt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64
+v_cmpx_lt_u16 v255, v2
+// GFX11: v_cmpx_lt_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_i16_e64
+v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_lt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
 v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_lt_u16_e64
+// GFX11: v_cmpx_lt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ne_i16_e64
-
-v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ne_u16_e64
+v_cmpx_ne_i16 v1, v255
+// GFX11: v_cmpx_ne_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_neq_f16_e64
+v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ne_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nge_f16_e64
+v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ne_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_ngt_f16_e64
+v_cmpx_ne_i16 v255, v2
+// GFX11: v_cmpx_ne_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nle_f16_e64
+v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ne_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nlg_f16_e64
+v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ne_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_nlt_f16_e64
+v_cmpx_ne_u16 v1, v255
+// GFX11: v_cmpx_ne_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_o_f16_e64
+v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ne_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_t_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ne_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_tru_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_ne_u16 v255, v2
+// GFX11: v_cmpx_ne_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cmpx_u_f16_e64
+v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ne_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64
+v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ne_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_eq_f16_e64
+v_cmpx_neq_f16 v1, v255
+// GFX11: v_cmpx_neq_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_eq_i16_e64
+v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_neq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_eq_u16_e64
+v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_neq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_f_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_f_f16_e64
+v_cmpx_neq_f16 v255, v2
+// GFX11: v_cmpx_neq_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8d,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ge_f16_e64
+v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_neq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ge_i16_e64
+v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_neq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ge_u16_e64
+v_cmpx_nge_f16 v1, v255
+// GFX11: v_cmpx_nge_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_gt_f16_e64
+v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_gt_i16_e64
+v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_gt_u16_e64
+v_cmpx_nge_f16 v255, v2
+// GFX11: v_cmpx_nge_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x89,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_le_f16_e64
+v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_le_i16_e64
+v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_le_u16_e64
+v_cmpx_ngt_f16 v1, v255
+// GFX11: v_cmpx_ngt_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lg_f16_e64
+v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ngt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64
+v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ngt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_i16_e64
+v_cmpx_ngt_f16 v255, v2
+// GFX11: v_cmpx_ngt_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8b,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_u16_e64
+v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_ngt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ne_i16_e64
+v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_ngt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ne_u16_e64
+v_cmpx_nle_f16 v1, v255
+// GFX11: v_cmpx_nle_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_neq_f16_e64
+v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nle_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nge_f16_e64
+v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nle_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ngt_f16_e64
+v_cmpx_nle_f16 v255, v2
+// GFX11: v_cmpx_nle_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8c,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nle_f16_e64
+v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nle_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nlg_f16_e64
+v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nle_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nlt_f16_e64
+v_cmpx_nlg_f16 v1, v255
+// GFX11: v_cmpx_nlg_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_o_f16_e64
+v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nlg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_t_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nlg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_tru_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_nlg_f16 v255, v2
+// GFX11: v_cmpx_nlg_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8a,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_u_f16_e64
+v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nlg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_class_f16_e64
+v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nlg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_eq_f16_e64
+v_cmpx_nlt_f16 v1, v255
+// GFX11: v_cmpx_nlt_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_eq_i16_e64
+v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nlt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_eq_u16_e64
+v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nlt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_f_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_f_f16_e64
+v_cmpx_nlt_f16 v255, v2
+// GFX11: v_cmpx_nlt_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8e,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ge_f16_e64
+v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_nlt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ge_i16_e64
+v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_nlt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ge_u16_e64
+v_cmpx_o_f16 v1, v255
+// GFX11: v_cmpx_o_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_gt_f16_e64
+v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_o_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_gt_i16_e64
+v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_o_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_gt_u16_e64
+v_cmpx_o_f16 v255, v2
+// GFX11: v_cmpx_o_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x87,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_le_f16_e64
+v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_o_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_le_i16_e64
+v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_o_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_le_u16_e64
+v_cmpx_t_f16 v1, v255
+// GFX11: v_cmpx_t_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lg_f16_e64
+v_cmpx_t_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_f16_e64
+v_cmpx_t_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_i16_e64
+v_cmpx_t_f16 v255, v2
+// GFX11: v_cmpx_t_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x8f,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_lt_u16_e64
+v_cmpx_t_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ne_i16_e64
+v_cmpx_t_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ne_u16_e64
+v_cmpx_tru_f16 v1, v255
+// GFX11: v_cmpx_t_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x8f,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_neq_f16_e64
+v_cmpx_tru_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nge_f16_e64
+v_cmpx_tru_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_ngt_f16_e64
+v_cmpx_tru_f16 v255, v2
+// GFX11: v_cmpx_t_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x8f,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nle_f16_e64
+v_cmpx_tru_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8f,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nlg_f16_e64
+v_cmpx_tru_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_t_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8f,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_nlt_f16_e64
+v_cmpx_u_f16 v1, v255
+// GFX11: v_cmpx_u_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_o_f16_e64
+v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cmpx_u_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_t_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_u_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_tru_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_t_f16_e64
+v_cmpx_u_f16 v255, v2
+// GFX11: v_cmpx_u_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x88,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_u_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cmpx_u_f16_e64
+// GFX11: v_cmpx_u_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
+v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX11: v_cmpx_u_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s
index 39010883a3c0b83..8bf9b92e8d1d8d7 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s
@@ -155,6 +155,15 @@ image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LO
 image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LOAD_HT scope:SCOPE_SE r128 a16 tfe d16
 // GFX12: encoding: [0x75,0x00,0xc0,0xd3,0x00,0x10,0xa4,0x00,0x04,0x05,0x00,0x00]
 
+image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+// GFX12: encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00]
+
+image_load v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+// GFX12: encoding: [0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+
+image_load v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+// GFX12: encoding: [0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+
 image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
 // GFX12: encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00]
 
@@ -402,6 +411,15 @@ image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_NT_WB
 image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope:SCOPE_SYS
 // GFX12: encoding: [0x00,0x80,0x41,0xd0,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
 
+image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D
+// GFX12: encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00]
+
+image_store v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D
+// GFX12: encoding: [0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+
+image_store v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+// GFX12: encoding: [0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+
 image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
 // GFX12: encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00]
 
@@ -559,6 +577,15 @@ image_atomic_swap v[3:4], [v4, v5], s[8:15] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA a1
 image_atomic_swap v[254:255], [v4, v5], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16
 // GFX12: encoding: [0x47,0x80,0xc2,0xd0,0xfe,0xc0,0x00,0x00,0x04,0x05,0x00,0x00]
 
+image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
+// GFX12: encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00]
+
+image_atomic_swap v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D
+// GFX12: encoding: [0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+
+image_atomic_swap v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+// GFX12: encoding: [0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+
 image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
 // GFX12: encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 
@@ -613,6 +640,15 @@ image_atomic_add_uint v[254:255], [v4, v5, v6, v7], s[96:103] dmask:0x3 dim:SQ_R
 image_atomic_add_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
 // GFX12: encoding: [0x00,0x00,0x43,0xd0,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00]
 
+image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
+// GFX12: encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00]
+
+image_atomic_add_uint v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D
+// GFX12: encoding: [0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+
+image_atomic_add_uint v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+// GFX12: encoding: [0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+
 image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
 // GFX12: encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s
new file mode 100644
index 000000000000000..76db94023fc903b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c-fake16.s
@@ -0,0 +1,8695 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, v255, v2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, s1, v2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, s105, v255
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, vcc_lo, s2
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, vcc_hi, s105
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, ttmp15, ttmp15
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, m0, src_scc
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], v255, v2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0xff,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], s1, v2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x01,0x04,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], s105, v255
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x69,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], vcc_lo, s2
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6a,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], vcc_hi, s105
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x6b,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], ttmp15, ttmp15
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7b,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], m0, src_scc
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7d,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x7d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x7d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x7d,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x7d,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f16_e64 null, -|0xfe0b|, vcc_hi
+// GFX12: encoding: [0x7c,0x01,0x7d,0xd4,0xff,0xd6,0x00,0x20,0x0b,0xfe,0x00,0x00]
+
+v_cmp_class_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x7e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x7e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x7e,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x7e,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f32_e64 null, -|0xaf123456|, vcc_hi
+// GFX12: encoding: [0x7c,0x01,0x7e,0xd4,0xff,0xd6,0x00,0x20,0x56,0x34,0x12,0xaf]
+
+v_cmp_class_f64_e64 s5, v[1:2], v2
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[1:2], v255
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[1:2], s2
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[1:2], s105
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, v[254:255], ttmp15
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, s[2:3], vcc_hi
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, s[104:105], vcc_lo
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, vcc, m0
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, ttmp[14:15], exec_hi
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s5, exec, exec_lo
+// W32: encoding: [0x05,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s105, null, null
+// W32: encoding: [0x69,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 vcc_hi, 0.5, 0.5
+// W32: encoding: [0x6b,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 ttmp15, -|src_scc|, src_scc
+// W32: encoding: [0x7b,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], v2
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], v255
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], s2
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0x05,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[1:2], s105
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x01,0xd3,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], v[254:255], ttmp15
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0xfe,0xf7,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], s[2:3], vcc_hi
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x02,0xd6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], s[104:105], vcc_lo
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x68,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], vcc, m0
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x6a,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], ttmp[14:15], exec_hi
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7a,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], exec, exec_lo
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7e,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[10:11], null, null
+// W64: encoding: [0x0a,0x00,0x7f,0xd4,0x7c,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x7f,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 vcc, 0.5, 0.5
+// W64: encoding: [0x6a,0x00,0x7f,0xd4,0xf0,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 ttmp[14:15], -|src_scc|, src_scc
+// W64: encoding: [0x7a,0x01,0x7f,0xd4,0xfd,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_class_f64_e64 null, 0xaf123456, 0xaf123456
+// GFX12: encoding: [0x7c,0x00,0x7f,0xd4,0xff,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x02,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x02,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x02,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x02,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x02,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x02,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_eq_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x12,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x12,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x12,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x12,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x12,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x12,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x12,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x22,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x22,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x22,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x22,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x22,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x22,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x32,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x32,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x32,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x32,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x32,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x32,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_eq_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x42,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x42,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x42,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x42,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x42,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x42,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x52,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x52,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x52,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x52,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_i64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x52,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3a,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3a,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x3a,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_eq_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4a,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4a,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x4a,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_eq_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5a,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5a,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5a,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5a,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_eq_u64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x5a,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x06,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x06,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x06,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x06,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x06,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x06,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ge_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x16,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x16,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x16,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x16,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x16,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x16,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x16,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x26,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x26,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x26,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x26,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x26,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x26,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x36,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x36,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x36,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x36,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x36,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x36,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ge_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x46,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x46,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x46,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x46,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x46,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x46,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x56,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x56,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x56,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x56,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_i64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x56,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3e,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3e,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x3e,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ge_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4e,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4e,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x4e,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ge_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5e,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5e,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5e,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5e,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ge_u64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x5e,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x04,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x04,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x04,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x04,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x04,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x04,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_gt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x14,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x14,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x14,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x14,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x14,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x14,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x14,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x24,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x24,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x24,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x24,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x24,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x24,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x34,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x34,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x34,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x34,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x34,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x34,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_gt_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x44,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x44,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x44,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x44,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x44,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x44,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x54,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x54,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x54,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x54,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_i64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x54,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3c,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3c,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x3c,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_gt_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4c,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4c,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x4c,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_gt_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5c,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5c,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5c,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5c,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_gt_u64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x5c,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x03,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x03,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x03,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x03,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x03,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x03,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_le_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x13,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x13,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x13,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x13,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x13,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x13,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x13,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x23,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x23,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x23,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x23,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x23,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x23,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x33,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x33,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x33,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x33,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x33,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x33,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_le_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x43,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x43,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x43,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x43,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x43,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x43,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x53,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x53,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x53,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x53,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_i64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x53,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3b,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3b,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x3b,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_le_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4b,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4b,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x4b,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_le_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5b,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5b,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5b,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5b,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_le_u64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x5b,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lg_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x05,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x05,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x05,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x05,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x05,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x05,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lg_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x15,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x15,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x15,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x15,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x15,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x15,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x15,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_lg_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x25,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x25,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x25,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x25,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x25,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lg_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x25,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x01,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x01,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x01,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x01,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x01,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x01,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x01,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x11,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x11,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x11,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x11,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x11,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x11,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x11,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x21,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x21,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x21,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x21,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x21,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x21,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x31,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x31,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x31,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x31,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x31,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x31,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lt_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x41,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x41,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x41,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x41,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x41,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x41,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x51,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x51,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x51,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x51,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_i64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x51,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x39,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x39,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x39,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x39,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x39,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x39,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_lt_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x49,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x49,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x49,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x49,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x49,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x49,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_lt_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x59,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x59,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x59,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x59,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_lt_u64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x59,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_i16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x35,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x35,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x35,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x35,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x35,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x35,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ne_i32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x45,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x45,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x45,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x45,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x45,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x45,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_i64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x55,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x55,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x55,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x55,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_i64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x55,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_u16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x3d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x3d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x3d,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x3d,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u16_e64 null, 0xfe0b, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x3d,0xd4,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ne_u32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s5, exec_hi, null
+// W32: encoding: [0x05,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 vcc_hi, 0.5, m0
+// W32: encoding: [0x6b,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 ttmp15, src_scc, vcc_lo
+// W32: encoding: [0x7b,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], exec_hi, null
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x4d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x4d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 vcc, 0.5, m0
+// W64: encoding: [0x6a,0x00,0x4d,0xd4,0xf0,0xfa,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 ttmp[14:15], src_scc, vcc_lo
+// W64: encoding: [0x7a,0x00,0x4d,0xd4,0xfd,0xd4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u32_e64 null, 0xaf123456, vcc_hi
+// GFX12: encoding: [0x7c,0x00,0x4d,0xd4,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_ne_u64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s5, exec, src_scc
+// W32: encoding: [0x05,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 ttmp15, src_scc, exec
+// W32: encoding: [0x7b,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], exec, src_scc
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7e,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x5d,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x5d,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x5d,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 ttmp[14:15], src_scc, exec
+// W64: encoding: [0x7a,0x00,0x5d,0xd4,0xfd,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ne_u64_e64 null, 0xaf123456, vcc
+// GFX12: encoding: [0x7c,0x00,0x5d,0xd4,0xff,0xd4,0x00,0x00,0x56,0x34,0x12,0xaf]
+
+v_cmp_neq_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0d,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0d,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x0d,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_neq_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1d,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1d,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1d,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1d,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1d,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x1d,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_neq_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2d,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2d,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2d,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2d,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2d,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_neq_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x2d,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nge_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x09,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x09,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x09,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x09,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x09,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x09,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nge_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x19,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x19,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x19,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x19,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x19,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x19,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x19,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nge_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x29,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x29,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x29,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x29,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x29,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nge_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x29,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_ngt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0b,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0b,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x0b,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_ngt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1b,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1b,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1b,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1b,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1b,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x1b,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_ngt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2b,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2b,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2b,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2b,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2b,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_ngt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x2b,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nle_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0c,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0c,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x0c,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nle_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1c,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1c,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1c,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1c,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1c,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x1c,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nle_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2c,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2c,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2c,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2c,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2c,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nle_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x2c,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlg_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0a,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0a,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x0a,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nlg_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1a,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1a,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1a,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1a,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1a,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x1a,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlg_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2a,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2a,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2a,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2a,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2a,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlg_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x2a,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlt_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x0e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x0e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x0e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x0e,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x0e,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x0e,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_nlt_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x1e,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x1e,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x1e,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x1e,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x1e,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x1e,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_nlt_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x2e,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x2e,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x2e,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x2e,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x2e,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_nlt_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x2e,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_o_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x07,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x07,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x07,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x07,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x07,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x07,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_o_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x17,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x17,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x17,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x17,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x17,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x17,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x17,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_o_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x27,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x27,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x27,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x27,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x27,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_o_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x27,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
+
+v_cmp_u_f16_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, vcc_hi, 0xfe0b
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], vcc_hi, 0xfe0b
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x6b,0xfe,0x01,0x00,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x08,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x08,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x08,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x08,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x08,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f16_e64 null, -|0xfe0b|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x08,0xd4,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cmp_u_f32_e64 s5, v1, v2
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, v255, v255
+// W32: encoding: [0x05,0x00,0x18,0xd4,0xff,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, s1, s2
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x01,0x04,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, s105, s105
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, vcc_lo, ttmp15
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, vcc_hi, 0xaf123456
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, ttmp15, src_scc
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, m0, 0.5
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, exec_lo, -1
+// W32: encoding: [0x05,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s5, |exec_hi|, null
+// W32: encoding: [0x05,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s105, null, exec_lo
+// W32: encoding: [0x69,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 vcc_lo, -1, exec_hi
+// W32: encoding: [0x6a,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 vcc_hi, 0.5, -m0
+// W32: encoding: [0x6b,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 ttmp15, -src_scc, |vcc_lo|
+// W32: encoding: [0x7b,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], v1, v2
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], v255, v255
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0xff,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], s1, s2
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x01,0x04,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], s105, s105
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x69,0xd2,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], vcc_lo, ttmp15
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6a,0xf6,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], vcc_hi, 0xaf123456
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x6b,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], ttmp15, src_scc
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7b,0xfa,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], m0, 0.5
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7d,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], exec_lo, -1
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7e,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], |exec_hi|, null
+// W64: encoding: [0x0a,0x01,0x18,0xd4,0x7f,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[10:11], null, exec_lo
+// W64: encoding: [0x0a,0x00,0x18,0xd4,0x7c,0xfc,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 s[104:105], -1, exec_hi
+// W64: encoding: [0x68,0x00,0x18,0xd4,0xc1,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 vcc, 0.5, -m0
+// W64: encoding: [0x6a,0x00,0x18,0xd4,0xf0,0xfa,0x00,0x40]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 ttmp[14:15], -src_scc, |vcc_lo|
+// W64: encoding: [0x7a,0x02,0x18,0xd4,0xfd,0xd4,0x00,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f32_e64 null, -|0xaf123456|, -|vcc_hi| clamp
+// GFX12: encoding: [0x7c,0x83,0x18,0xd4,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
+
+v_cmp_u_f64_e64 s5, v[1:2], v[2:3]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x01,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, v[254:255], v[254:255]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, s[2:3], s[4:5]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x02,0x08,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, s[104:105], s[104:105]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, vcc, ttmp[14:15]
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, ttmp[14:15], 0xaf123456
+// W32: encoding: [0x05,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s5, -|exec|, src_scc
+// W32: encoding: [0x05,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s105, null, 0.5
+// W32: encoding: [0x69,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 vcc_lo, -1, -1
+// W32: encoding: [0x6a,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 vcc_hi, 0.5, null
+// W32: encoding: [0x6b,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 ttmp15, -|src_scc|, -|exec|
+// W32: encoding: [0x7b,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], v[1:2], v[2:3]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x01,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], v[254:255], v[254:255]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0xfe,0xfd,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], s[2:3], s[4:5]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x02,0x08,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], s[104:105], s[104:105]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x68,0xd0,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], vcc, ttmp[14:15]
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x6a,0xf4,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], ttmp[14:15], 0xaf123456
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7a,0xfe,0x01,0x00,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], -|exec|, src_scc
+// W64: encoding: [0x0a,0x01,0x28,0xd4,0x7e,0xfa,0x01,0x20]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[10:11], null, 0.5
+// W64: encoding: [0x0a,0x00,0x28,0xd4,0x7c,0xe0,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 s[104:105], -1, -1
+// W64: encoding: [0x68,0x00,0x28,0xd4,0xc1,0x82,0x01,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 vcc, 0.5, null
+// W64: encoding: [0x6a,0x00,0x28,0xd4,0xf0,0xf8,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 ttmp[14:15], -|src_scc|, -|exec|
+// W64: encoding: [0x7a,0x03,0x28,0xd4,0xfd,0xfc,0x00,0x60]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+
+v_cmp_u_f64_e64 null, 0xaf123456, -|vcc| clamp
+// GFX12: encoding: [0x7c,0x82,0x28,0xd4,0xff,0xd4,0x00,0x40,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
index 98dbbf6cff448b5..76db94023fc903b 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_e64 s5, v1, v2
 // W32: encoding: [0x05,0x00,0x7d,0xd4,0x01,0x05,0x02,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s
new file mode 100644
index 000000000000000..ebac9fed92503ee
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc-fake16.s
@@ -0,0 +1,9076 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_e32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0xfa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0xfa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0xfa,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0xfc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0xfc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0xfd,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, v[1:2], v2
+// W32: encoding: [0x01,0x05,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, v[254:255], v2
+// W32: encoding: [0xfe,0x05,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, s[2:3], v2
+// W32: encoding: [0x02,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, s[104:105], v2
+// W32: encoding: [0x68,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, vcc, v2
+// W32: encoding: [0x6a,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, ttmp[14:15], v2
+// W32: encoding: [0x7a,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, exec, v2
+// W32: encoding: [0x7e,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0xfe,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, v[1:2], v2
+// W64: encoding: [0x01,0x05,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, v[254:255], v2
+// W64: encoding: [0xfe,0x05,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, s[2:3], v2
+// W64: encoding: [0x02,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, s[104:105], v2
+// W64: encoding: [0x68,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, vcc, v2
+// W64: encoding: [0x6a,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, ttmp[14:15], v2
+// W64: encoding: [0x7a,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, exec, v2
+// W64: encoding: [0x7e,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0xfe,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f64 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0xff,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x04,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x04,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x04,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x24,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x24,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x25,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x44,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x44,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x45,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x64,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x64,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x64,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x84,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x84,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x85,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa5,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x74,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x74,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x74,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x94,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x94,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x95,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb4,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb4,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb5,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x0c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x0c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x0c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x2c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x2c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x2d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x4c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x4c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x4d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x6c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x6c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x6c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x8c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x8c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x8d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xac,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xac,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xad,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x7c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x7c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x7c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x9c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x9c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x9d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xbc,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xbc,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xbd,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x08,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x08,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x08,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x28,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x28,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x29,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x48,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x48,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x49,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x68,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x68,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x68,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x88,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x88,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x89,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa9,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x78,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x78,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x78,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x98,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x98,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x99,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb8,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb8,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb9,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x06,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x06,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x06,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x26,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x26,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x27,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x46,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x46,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x47,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x66,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x66,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x66,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x86,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x86,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x87,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa7,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x76,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x76,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x76,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x96,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x96,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x97,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb6,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb6,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb7,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x0a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x0a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x0a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x2a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x2a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x2b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x4a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x4a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x4b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x02,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x02,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x02,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x22,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x22,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x23,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x42,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x42,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x43,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x62,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x62,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x62,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x82,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x82,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x83,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xa2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xa2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xa3,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x72,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x72,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x72,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x92,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x92,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x93,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xb2,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xb2,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xb3,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x6a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x6a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x6a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x8a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x8a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x8b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xaa,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xaa,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xab,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x7a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x7a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x7a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x9a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x9a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x9b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0xba,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0xba,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0xbb,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x1a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x1a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x1a,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x3a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x3a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x3b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x5a,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x5a,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x5b,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x12,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x12,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x12,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x32,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x32,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x33,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x52,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x52,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x53,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x16,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x16,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x16,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x36,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x36,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x37,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x56,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x56,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x57,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x18,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x18,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x18,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x38,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x38,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x39,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x58,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x58,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x59,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x14,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x14,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x14,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x34,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x34,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x35,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x54,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x54,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x55,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x1c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x1c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x1c,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x3c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x3c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x3d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x5c,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x5c,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x5d,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x0e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x0e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x0e,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x2e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x2e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x2f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x4e,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x4e,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x4f,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127, v2
+// W32: encoding: [0x7f,0x05,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x10,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, 0xfe0b, v127
+// W32: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127, v2
+// W64: encoding: [0x7f,0x05,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x10,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, 0xfe0b, v127
+// W64: encoding: [0xff,0xfe,0x10,0x7c,0x0b,0xfe,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2
+// W32: encoding: [0x01,0x05,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v255, v2
+// W32: encoding: [0xff,0x05,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, s1, v2
+// W32: encoding: [0x01,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, s105, v2
+// W32: encoding: [0x69,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, vcc_lo, v2
+// W32: encoding: [0x6a,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, vcc_hi, v2
+// W32: encoding: [0x6b,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, ttmp15, v2
+// W32: encoding: [0x7b,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, m0, v2
+// W32: encoding: [0x7d,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, exec_lo, v2
+// W32: encoding: [0x7e,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, exec_hi, v2
+// W32: encoding: [0x7f,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, null, v2
+// W32: encoding: [0x7c,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, -1, v2
+// W32: encoding: [0xc1,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, 0.5, v2
+// W32: encoding: [0xf0,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, src_scc, v2
+// W32: encoding: [0xfd,0x04,0x30,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, 0xaf123456, v255
+// W32: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2
+// W64: encoding: [0x01,0x05,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v255, v2
+// W64: encoding: [0xff,0x05,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, s1, v2
+// W64: encoding: [0x01,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, s105, v2
+// W64: encoding: [0x69,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, vcc_lo, v2
+// W64: encoding: [0x6a,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, vcc_hi, v2
+// W64: encoding: [0x6b,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, ttmp15, v2
+// W64: encoding: [0x7b,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, m0, v2
+// W64: encoding: [0x7d,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, exec_lo, v2
+// W64: encoding: [0x7e,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, exec_hi, v2
+// W64: encoding: [0x7f,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, null, v2
+// W64: encoding: [0x7c,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, -1, v2
+// W64: encoding: [0xc1,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, 0.5, v2
+// W64: encoding: [0xf0,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, src_scc, v2
+// W64: encoding: [0xfd,0x04,0x30,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, 0xaf123456, v255
+// W64: encoding: [0xff,0xfe,0x31,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, v[1:2], v[2:3]
+// W32: encoding: [0x01,0x05,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, v[254:255], v[2:3]
+// W32: encoding: [0xfe,0x05,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, s[2:3], v[2:3]
+// W32: encoding: [0x02,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, s[104:105], v[2:3]
+// W32: encoding: [0x68,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, vcc, v[2:3]
+// W32: encoding: [0x6a,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, ttmp[14:15], v[2:3]
+// W32: encoding: [0x7a,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, exec, v[2:3]
+// W32: encoding: [0x7e,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, null, v[2:3]
+// W32: encoding: [0x7c,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, -1, v[2:3]
+// W32: encoding: [0xc1,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, 0.5, v[2:3]
+// W32: encoding: [0xf0,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, src_scc, v[2:3]
+// W32: encoding: [0xfd,0x04,0x50,0x7c]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc_lo, 0xaf123456, v[254:255]
+// W32: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, v[1:2], v[2:3]
+// W64: encoding: [0x01,0x05,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, v[254:255], v[2:3]
+// W64: encoding: [0xfe,0x05,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, s[2:3], v[2:3]
+// W64: encoding: [0x02,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, s[104:105], v[2:3]
+// W64: encoding: [0x68,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, vcc, v[2:3]
+// W64: encoding: [0x6a,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, ttmp[14:15], v[2:3]
+// W64: encoding: [0x7a,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, exec, v[2:3]
+// W64: encoding: [0x7e,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, null, v[2:3]
+// W64: encoding: [0x7c,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, -1, v[2:3]
+// W64: encoding: [0xc1,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, 0.5, v[2:3]
+// W64: encoding: [0xf0,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, src_scc, v[2:3]
+// W64: encoding: [0xfd,0x04,0x50,0x7c]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f64 vcc, 0xaf123456, v[254:255]
+// W64: encoding: [0xff,0xfc,0x51,0x7c,0x56,0x34,0x12,0xaf]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s
index c9241ebd161de47..4ae4f74ad21965e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_e32 vcc_lo, v1, v2
 // W32: encoding: [0x01,0x05,0xfa,0x7c]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s
new file mode 100644
index 000000000000000..13c4f89cb70240c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16-fake16.s
@@ -0,0 +1,6052 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0xfa,0x7c,0x7f,0x6f,0x35,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0xfc,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0xfd,0x7c,0xff,0x6f,0x35,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x04,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x04,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x24,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x25,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x64,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x64,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x84,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x85,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x74,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x74,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x94,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x95,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x0c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x0c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x2c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x2d,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x6c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x6c,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x8c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x8d,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x7c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x7c,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x9c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x9d,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x08,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x08,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x28,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x29,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x68,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x68,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x88,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x89,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x78,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x78,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x98,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x99,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x06,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x06,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x27,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x66,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x66,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x86,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x87,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x76,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x76,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x96,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x97,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x0a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x0a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x2a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x2b,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x02,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x02,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x22,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x23,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x62,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x62,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x82,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x83,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x72,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x72,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x92,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x93,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x6a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x6a,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x8a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x8b,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x7a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x7a,0x7c,0x7f,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x9a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x9b,0x7c,0xff,0x6f,0x05,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x1a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x1a,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x3a,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x3b,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x12,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x12,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x32,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x33,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x16,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x16,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x36,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x37,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x18,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x18,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x38,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x39,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x14,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x14,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x34,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x35,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x1c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x1c,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x3c,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x3d,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x0e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x0e,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x2e,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x2f,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x10,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x10,0x7c,0x7f,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[3,2,1,0]
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 quad_perm:[0,1,2,3]
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_mirror
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_half_mirror
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shl:1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shl:15
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shr:1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_shr:15
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_ror:1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_ror:15
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W32: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W32: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 quad_perm:[3,2,1,0]
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 quad_perm:[0,1,2,3]
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0xe4,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_mirror
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x40,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_half_mirror
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x41,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shl:1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x01,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shl:15
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x0f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shr:1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x11,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_shr:15
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x1f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_ror:1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x21,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_ror:15
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x2f,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x50,0x01,0xff]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x5f,0x01,0x01]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// W64: encoding: [0xfa,0x04,0x30,0x7c,0x01,0x60,0x09,0x13]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// W64: encoding: [0xfa,0xfe,0x31,0x7c,0xff,0x6f,0xf5,0x30]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s
index 0c3a38626fa6c09..0c36108cb0cbe1c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp16.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_dpp vcc_lo, v1, v2 quad_perm:[3,2,1,0]
 // W32: encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s
new file mode 100644
index 000000000000000..87305ec913d1f3e
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8-fake16.s
@@ -0,0 +1,1300 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+
+v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0xfa,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0xfc,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0xfd,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x04,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x04,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x24,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x25,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x64,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x64,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x84,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x85,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x74,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x74,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x94,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x95,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x0c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x0c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x2c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x2d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x6c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x6c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x8c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x8d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x7c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x7c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x9c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x9d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x08,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x08,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x28,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x29,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x68,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x68,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x88,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x89,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x78,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x78,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x98,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x99,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x06,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x06,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x26,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x27,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x66,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x66,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x86,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x87,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x76,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x76,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x96,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x97,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x0a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x0a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x2a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x2b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x02,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x02,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x22,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x23,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x62,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x62,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x82,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x83,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x72,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x72,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x92,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x93,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x6a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x6a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x8a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x8b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x7a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x7a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x9a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x9b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x1a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x1a,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x3a,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x3b,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x12,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x12,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x32,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x33,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x16,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x16,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x36,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x37,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x18,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x18,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x38,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x39,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x14,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x14,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x34,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x35,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x1c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x1c,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x3c,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x3d,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x0e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x0e,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x2e,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x2f,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc_lo, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x10,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x10,0x7c,0x7f,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W32: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc_lo, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W32: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00]
+// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: encoding: [0xe9,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// W64: encoding: [0xea,0x04,0x30,0x7c,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f32 vcc, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// W64: encoding: [0xe9,0xfe,0x31,0x7c,0xff,0x00,0x00,0x00]
+// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s
index ceecbc660d06cb8..2a4095f99d834f2 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_dpp8.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16_dpp vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // W32: encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s
index ebdd8adc0adf99a..e603e7388a684e0 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_err.s
@@ -1,1774 +1,1775 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s
 
 v_cmp_class_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_class_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_class_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:40: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
 
 v_cmp_class_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_i16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_eq_u16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_i16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ge_u16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_i16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_u16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_u16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_gt_u16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16_e32 vcc, v127, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_u16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc, vcc_hi, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc, vcc_lo, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_u16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_gt_u16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_i16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_le_u16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_i16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_lt_u16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ne_i16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
 v_cmp_ne_i16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_i16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_i16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ne_u16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
 
-v_cmp_ne_u16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v127, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:32: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc, vcc_hi, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_u16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:37: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
 
 v_cmp_ne_u16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_neq_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nge_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_ngt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nle_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlg_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:35: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:38: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
 v_cmp_nlt_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
+
+v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
 v_cmp_o_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f16_e32 vcc, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_u_f16_e32 vcc, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:33: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
+
+v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
 v_cmp_u_f16_e32 vcc, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f16_e32 vcc, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f16_e32 vcc_lo, v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16_e32 vcc_lo, v127, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
-v_cmp_class_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:36: error: invalid operand for instruction
 
-v_cmp_eq_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v128, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_i16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:34: error: invalid operand for instruction
 
-v_cmp_eq_i16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16_e32 vcc_lo, vcc_hi, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_u16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_u16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_i16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_i16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_u16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_u16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_u16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_u16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_u16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_u16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lg_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lg_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_i16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_i16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_u16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_u16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_i16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_i16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_u16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_u16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nle_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nle_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlg_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlt_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_o_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_o_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16_e32 vcc_lo, v128, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_class_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16_e32 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16_e32 vcc_lo, vcc_lo, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
index 65c0a3c874efbd0..e56c46bb55448c5 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopc_t16_promote.s
@@ -1,2368 +1,2369 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefix=W32 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=W64 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefix=W32 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefix=W64 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 %s 2>&1 > /dev/null | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s
 
 v_cmp_class_f16 vcc, v1, v255
-// W64: v_cmp_class_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_class_f16_e64 vcc, v1, v255       ; encoding: [0x6a,0x00,0x7d,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, v127, v255
-// W64: v_cmp_class_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_class_f16_e64 vcc, v127, v255     ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v128, v2
+// W64: v_cmp_class_f16_e64 vcc, v128, v2       ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc, vcc_hi, v255
-// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_class_f16_e64 vcc, vcc_hi, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16 vcc, vcc_lo, v255
-// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_class_f16_e64 vcc, vcc_lo, v255   ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16 vcc_lo, v127, v255
-// W32: v_cmp_class_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_class_f16_e64 vcc_lo, v127, v255  ; encoding: [0x6a,0x00,0x7d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v128, v2
+// W32: v_cmp_class_f16_e64 vcc_lo, v128, v2    ; encoding: [0x6a,0x00,0x7d,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
+
+v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:17: error: invalid operand for instruction
 
 v_cmp_class_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_hi, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_class_f16_e64 vcc_lo, vcc_lo, v255 ; encoding: [0x6a,0x00,0x7d,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16 vcc, v1, v255
-// W64: v_cmp_eq_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_f16 vcc, v127, v255
-// W64: v_cmp_eq_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v128, v2
+// W64: v_cmp_eq_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_f16 vcc, vcc_hi, v255
-// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16 vcc, vcc_lo, v255
-// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16 vcc_lo, v1, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x02,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_f16 vcc_lo, v127, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x02,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v128, v2
+// W32: v_cmp_eq_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x02,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x02,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x02,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16 vcc, v1, v255
-// W64: v_cmp_eq_i16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_i16 vcc, v127, v255
-// W64: v_cmp_eq_i16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc, v128, v2
+// W64: v_cmp_eq_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_i16 vcc, vcc_hi, v255
-// W64: v_cmp_eq_i16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16 vcc, vcc_lo, v255
-// W64: v_cmp_eq_i16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16 vcc_lo, v1, v255
-// W32: v_cmp_eq_i16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_i16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x32,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_i16 vcc_lo, v127, v255
-// W32: v_cmp_eq_i16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_i16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x32,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc_lo, v128, v2
+// W32: v_cmp_eq_i16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x32,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_i16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x32,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_i16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_i16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x32,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16 vcc, v1, v255
-// W64: v_cmp_eq_u16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_u16 vcc, v127, v255
-// W64: v_cmp_eq_u16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc, v128, v2
+// W64: v_cmp_eq_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_u16 vcc, vcc_hi, v255
-// W64: v_cmp_eq_u16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16 vcc, vcc_lo, v255
-// W64: v_cmp_eq_u16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_eq_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16 vcc_lo, v1, v255
-// W32: v_cmp_eq_u16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_u16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x3a,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_u16 vcc_lo, v127, v255
-// W32: v_cmp_eq_u16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_u16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x3a,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc_lo, v128, v2
+// W32: v_cmp_eq_u16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x3a,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_eq_u16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x3a,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_eq_u16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_eq_u16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x3a,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16 vcc, v1, v255
-// W64: v_cmp_ge_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_f16 vcc, v127, v255
-// W64: v_cmp_ge_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v128, v2
+// W64: v_cmp_ge_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_f16 vcc, vcc_hi, v255
-// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16 vcc, vcc_lo, v255
-// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16 vcc_lo, v1, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x06,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_f16 vcc_lo, v127, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x06,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v128, v2
+// W32: v_cmp_ge_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x06,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x06,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x06,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16 vcc, v1, v255
-// W64: v_cmp_ge_i16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_i16 vcc, v127, v255
-// W64: v_cmp_ge_i16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc, v128, v2
+// W64: v_cmp_ge_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_i16 vcc, vcc_hi, v255
-// W64: v_cmp_ge_i16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16 vcc, vcc_lo, v255
-// W64: v_cmp_ge_i16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16 vcc_lo, v1, v255
-// W32: v_cmp_ge_i16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_i16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x36,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_i16 vcc_lo, v127, v255
-// W32: v_cmp_ge_i16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_i16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x36,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc_lo, v128, v2
+// W32: v_cmp_ge_i16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x36,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_i16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x36,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_i16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x36,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16 vcc, v1, v255
-// W64: v_cmp_ge_u16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_u16 vcc, v127, v255
-// W64: v_cmp_ge_u16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc, v128, v2
+// W64: v_cmp_ge_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_u16 vcc, vcc_hi, v255
-// W64: v_cmp_ge_u16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16 vcc, vcc_lo, v255
-// W64: v_cmp_ge_u16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ge_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16 vcc_lo, v1, v255
-// W32: v_cmp_ge_u16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_u16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x3e,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_u16 vcc_lo, v127, v255
-// W32: v_cmp_ge_u16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_u16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x3e,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc_lo, v128, v2
+// W32: v_cmp_ge_u16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x3e,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ge_u16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x3e,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_u16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ge_u16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x3e,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16 vcc, v1, v255
-// W64: v_cmp_gt_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_f16 vcc, v127, v255
-// W64: v_cmp_gt_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v128, v2
+// W64: v_cmp_gt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16 vcc_lo, v1, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x04,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_f16 vcc_lo, v127, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x04,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v128, v2
+// W32: v_cmp_gt_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x04,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x04,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x04,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16 vcc, v1, v255
-// W64: v_cmp_gt_i16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_i16 vcc, v127, v255
-// W64: v_cmp_gt_i16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc, v128, v2
+// W64: v_cmp_gt_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_i16 vcc, vcc_hi, v255
-// W64: v_cmp_gt_i16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16 vcc, vcc_lo, v255
-// W64: v_cmp_gt_i16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16 vcc_lo, v1, v255
-// W32: v_cmp_gt_i16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_i16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x34,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_i16 vcc_lo, v127, v255
-// W32: v_cmp_gt_i16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_i16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x34,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc_lo, v128, v2
+// W32: v_cmp_gt_i16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x34,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_i16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x34,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_i16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_i16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x34,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16 vcc, v1, v255
-// W64: v_cmp_gt_u16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_gt_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_u16 vcc, v127, v255
-// W64: v_cmp_gt_u16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_u16 vcc, vcc_hi, v255
-// W64: v_cmp_gt_u16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_gt_u16 vcc, vcc_lo, v255
-// W64: v_cmp_gt_u16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16 vcc, v127, v255
+// W64: v_cmp_gt_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_gt_u16 vcc_lo, v1, v255
-// W32: v_cmp_gt_u16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc, v128, v2
+// W64: v_cmp_gt_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc, vcc_hi, v255
+// W64: v_cmp_gt_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc, vcc_lo, v255
+// W64: v_cmp_gt_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v255
+// W32: v_cmp_gt_u16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x3c,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_u16 vcc_lo, v127, v255
-// W32: v_cmp_gt_u16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_u16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x3c,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc_lo, v128, v2
+// W32: v_cmp_gt_u16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x3c,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_gt_u16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x3c,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_gt_u16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_gt_u16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x3c,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16 vcc, v1, v255
-// W64: v_cmp_le_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_f16 vcc, v127, v255
-// W64: v_cmp_le_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc, v128, v2
+// W64: v_cmp_le_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_f16 vcc, vcc_hi, v255
-// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16 vcc, vcc_lo, v255
-// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16 vcc_lo, v1, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x03,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_f16 vcc_lo, v127, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x03,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128, v2
+// W32: v_cmp_le_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x03,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x03,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x03,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16 vcc, v1, v255
-// W64: v_cmp_le_i16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_i16 vcc, v127, v255
-// W64: v_cmp_le_i16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v128, v2
+// W64: v_cmp_le_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_i16 vcc, vcc_hi, v255
-// W64: v_cmp_le_i16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16 vcc, vcc_lo, v255
-// W64: v_cmp_le_i16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16 vcc_lo, v1, v255
-// W32: v_cmp_le_i16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_i16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x33,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_i16 vcc_lo, v127, v255
-// W32: v_cmp_le_i16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_i16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x33,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc_lo, v128, v2
+// W32: v_cmp_le_i16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x33,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_i16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_le_i16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_i16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x33,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_i16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_le_i16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_i16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x33,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16 vcc, v1, v255
-// W64: v_cmp_le_u16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_u16 vcc, v127, v255
-// W64: v_cmp_le_u16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc, v128, v2
+// W64: v_cmp_le_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_u16 vcc, vcc_hi, v255
-// W64: v_cmp_le_u16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16 vcc, vcc_lo, v255
-// W64: v_cmp_le_u16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_le_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16 vcc_lo, v1, v255
-// W32: v_cmp_le_u16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_u16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x3b,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_u16 vcc_lo, v127, v255
-// W32: v_cmp_le_u16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_u16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x3b,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc_lo, v128, v2
+// W32: v_cmp_le_u16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x3b,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_le_u16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_le_u16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_u16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x3b,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_le_u16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_le_u16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_le_u16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x3b,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16 vcc, v1, v255
-// W64: v_cmp_lg_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lg_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lg_f16 vcc, v127, v255
-// W64: v_cmp_lg_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lg_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v128, v2
+// W64: v_cmp_lg_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lg_f16 vcc, vcc_hi, v255
-// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lg_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16 vcc, vcc_lo, v255
-// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lg_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16 vcc_lo, v1, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lg_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x05,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lg_f16 vcc_lo, v127, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lg_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x05,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v128, v2
+// W32: v_cmp_lg_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x05,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lg_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x05,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lg_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lg_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x05,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16 vcc, v1, v255
-// W64: v_cmp_lt_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_f16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_f16 vcc, v127, v255
-// W64: v_cmp_lt_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_f16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc, v128, v2
+// W64: v_cmp_lt_f16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_lt_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_f16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_lt_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_f16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16 vcc_lo, v1, v255
-// W32: v_cmp_lt_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_f16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x01,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_f16 vcc_lo, v127, v255
-// W32: v_cmp_lt_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_f16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x01,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc_lo, v128, v2
+// W32: v_cmp_lt_f16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x01,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x01,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_f16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x01,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16 vcc, v1, v255
-// W64: v_cmp_lt_i16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_i16 vcc, v127, v255
-// W64: v_cmp_lt_i16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc, v128, v2
+// W64: v_cmp_lt_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_i16 vcc, vcc_hi, v255
-// W64: v_cmp_lt_i16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16 vcc, vcc_lo, v255
-// W64: v_cmp_lt_i16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16 vcc_lo, v1, v255
-// W32: v_cmp_lt_i16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_i16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x31,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_i16 vcc_lo, v127, v255
-// W32: v_cmp_lt_i16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_i16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x31,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc_lo, v128, v2
+// W32: v_cmp_lt_i16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x31,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_i16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x31,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_i16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_i16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x31,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16 vcc, v1, v255
-// W64: v_cmp_lt_u16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_u16 vcc, v127, v255
-// W64: v_cmp_lt_u16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc, v128, v2
+// W64: v_cmp_lt_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_u16 vcc, vcc_hi, v255
-// W64: v_cmp_lt_u16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16 vcc, vcc_lo, v255
-// W64: v_cmp_lt_u16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_lt_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16 vcc_lo, v1, v255
-// W32: v_cmp_lt_u16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_u16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x39,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_u16 vcc_lo, v127, v255
-// W32: v_cmp_lt_u16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_u16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x39,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc_lo, v128, v2
+// W32: v_cmp_lt_u16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x39,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_lt_u16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x39,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_lt_u16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_lt_u16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x39,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16 vcc, v1, v255
-// W64: v_cmp_ne_i16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ne_i16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ne_i16 vcc, v127, v255
-// W64: v_cmp_ne_i16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ne_i16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc, v128, v2
+// W64: v_cmp_ne_i16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ne_i16 vcc, vcc_hi, v255
-// W64: v_cmp_ne_i16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ne_i16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16 vcc, vcc_lo, v255
-// W64: v_cmp_ne_i16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ne_i16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16 vcc_lo, v1, v255
-// W32: v_cmp_ne_i16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_i16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x35,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ne_i16 vcc_lo, v127, v255
-// W32: v_cmp_ne_i16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_i16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x35,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc_lo, v128, v2
+// W32: v_cmp_ne_i16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x35,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ne_i16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x35,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_i16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_i16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x35,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16 vcc, v1, v255
-// W64: v_cmp_ne_u16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ne_u16_e64 vcc, v1, v255          ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_ne_u16 vcc, v127, v255
-// W64: v_cmp_ne_u16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
-v_cmp_ne_u16 vcc, vcc_hi, v255
-// W64: v_cmp_ne_u16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc, v127, v255
+// W64: v_cmp_ne_u16_e64 vcc, v127, v255        ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc, v128, v2
+// W64: v_cmp_ne_u16_e64 vcc, v128, v2          ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc, vcc_hi, v255
+// W64: v_cmp_ne_u16_e64 vcc, vcc_hi, v255      ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16 vcc, vcc_lo, v255
-// W64: v_cmp_ne_u16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ne_u16_e64 vcc, vcc_lo, v255      ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16 vcc_lo, v1, v255
-// W32: v_cmp_ne_u16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_u16_e64 vcc_lo, v1, v255       ; encoding: [0x6a,0x00,0x3d,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ne_u16 vcc_lo, v127, v255
-// W32: v_cmp_ne_u16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_u16_e64 vcc_lo, v127, v255     ; encoding: [0x6a,0x00,0x3d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc_lo, v128, v2
+// W32: v_cmp_ne_u16_e64 vcc_lo, v128, v2       ; encoding: [0x6a,0x00,0x3d,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
+
+v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:14: error: invalid operand for instruction
 
 v_cmp_ne_u16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_hi, v255   ; encoding: [0x6a,0x00,0x3d,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ne_u16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ne_u16_e64 vcc_lo, vcc_lo, v255   ; encoding: [0x6a,0x00,0x3d,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16 vcc, v1, v255
-// W64: v_cmp_neq_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_neq_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_neq_f16 vcc, v127, v255
-// W64: v_cmp_neq_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_neq_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc, v128, v2
+// W64: v_cmp_neq_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_neq_f16 vcc, vcc_hi, v255
-// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_neq_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16 vcc, vcc_lo, v255
-// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_neq_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16 vcc_lo, v1, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_neq_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0d,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_neq_f16 vcc_lo, v127, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_neq_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0d,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc_lo, v128, v2
+// W32: v_cmp_neq_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0d,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_neq_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0d,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_neq_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_neq_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0d,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16 vcc, v1, v255
-// W64: v_cmp_nge_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nge_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nge_f16 vcc, v127, v255
-// W64: v_cmp_nge_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nge_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc, v128, v2
+// W64: v_cmp_nge_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nge_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nge_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nge_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16 vcc_lo, v1, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nge_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x09,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nge_f16 vcc_lo, v127, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nge_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x09,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc_lo, v128, v2
+// W32: v_cmp_nge_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x09,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nge_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x09,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nge_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nge_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x09,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16 vcc, v1, v255
-// W64: v_cmp_ngt_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ngt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_ngt_f16 vcc, v127, v255
-// W64: v_cmp_ngt_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ngt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, v128, v2
+// W64: v_cmp_ngt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_ngt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ngt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_ngt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16 vcc_lo, v1, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0b,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_ngt_f16 vcc_lo, v127, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0b,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128, v2
+// W32: v_cmp_ngt_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0b,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_ngt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0b,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ngt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_ngt_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0b,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16 vcc, v1, v255
-// W64: v_cmp_nle_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nle_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nle_f16 vcc, v127, v255
-// W64: v_cmp_nle_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nle_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128, v2
+// W64: v_cmp_nle_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nle_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nle_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nle_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16 vcc_lo, v1, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nle_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0c,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nle_f16 vcc_lo, v127, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nle_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0c,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128, v2
+// W32: v_cmp_nle_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0c,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nle_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0c,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nle_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nle_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0c,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16 vcc, v1, v255
-// W64: v_cmp_nlg_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlg_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlg_f16 vcc, v127, v255
-// W64: v_cmp_nlg_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlg_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128, v2
+// W64: v_cmp_nlg_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlg_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlg_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlg_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16 vcc_lo, v1, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0a,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlg_f16 vcc_lo, v127, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0a,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v128, v2
+// W32: v_cmp_nlg_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0a,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlg_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0a,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlg_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlg_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0a,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16 vcc, v1, v255
-// W64: v_cmp_nlt_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlt_f16_e64 vcc, v1, v255         ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlt_f16 vcc, v127, v255
-// W64: v_cmp_nlt_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlt_f16_e64 vcc, v127, v255       ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128, v2
+// W64: v_cmp_nlt_f16_e64 vcc, v128, v2         ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlt_f16 vcc, vcc_hi, v255
-// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlt_f16_e64 vcc, vcc_hi, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16 vcc, vcc_lo, v255
-// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_nlt_f16_e64 vcc, vcc_lo, v255     ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16 vcc_lo, v1, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v1, v255      ; encoding: [0x6a,0x00,0x0e,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlt_f16 vcc_lo, v127, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v127, v255    ; encoding: [0x6a,0x00,0x0e,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128, v2
+// W32: v_cmp_nlt_f16_e64 vcc_lo, v128, v2      ; encoding: [0x6a,0x00,0x0e,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
+
+v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:15: error: invalid operand for instruction
 
 v_cmp_nlt_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_hi, v255  ; encoding: [0x6a,0x00,0x0e,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_nlt_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_nlt_f16_e64 vcc_lo, vcc_lo, v255  ; encoding: [0x6a,0x00,0x0e,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16 vcc, v1, v255
-// W64: v_cmp_o_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_o_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
 v_cmp_o_f16 vcc, v127, v255
-// W64: v_cmp_o_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_o_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128, v2
+// W64: v_cmp_o_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
 v_cmp_o_f16 vcc, vcc_hi, v255
-// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_o_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16 vcc, vcc_lo, v255
-// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_o_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16 vcc_lo, v1, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_o_f16_e64 vcc_lo, v1, v255        ; encoding: [0x6a,0x00,0x07,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
 v_cmp_o_f16 vcc_lo, v127, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_o_f16_e64 vcc_lo, v127, v255      ; encoding: [0x6a,0x00,0x07,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v128, v2
+// W32: v_cmp_o_f16_e64 vcc_lo, v128, v2        ; encoding: [0x6a,0x00,0x07,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
 v_cmp_o_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_o_f16_e64 vcc_lo, vcc_hi, v255    ; encoding: [0x6a,0x00,0x07,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_o_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_o_f16_e64 vcc_lo, vcc_lo, v255    ; encoding: [0x6a,0x00,0x07,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f16 vcc, v1, v255
-// W64: v_cmp_u_f16_e64 vcc, v1, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_u_f16_e64 vcc, v1, v255           ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
 v_cmp_u_f16 vcc, v127, v255
-// W64: v_cmp_u_f16_e64 vcc, v127, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_u_f16_e64 vcc, v127, v255         ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128, v2
+// W64: v_cmp_u_f16_e64 vcc, v128, v2           ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
+
+v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
+
+v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
+// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W32-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
 v_cmp_u_f16 vcc, vcc_hi, v255
-// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_u_f16_e64 vcc, vcc_hi, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f16 vcc, vcc_lo, v255
-// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W64: v_cmp_u_f16_e64 vcc, vcc_lo, v255       ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+// W32-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_u_f16 vcc_lo, v1, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, v1, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// W32: v_cmp_u_f16_e64 vcc_lo, v1, v255        ; encoding: [0x6a,0x00,0x08,0xd4,0x01,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_u_f16 vcc_lo, v127, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, v127, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, vcc_hi, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_u_f16 vcc_lo, vcc_lo, v255
-// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v127, v255
+// W32: v_cmp_u_f16_e64 vcc_lo, v127, v255      ; encoding: [0x6a,0x00,0x08,0xd4,0x7f,0xff,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_class_f16 vcc, v128, v2
-// W64: v_cmp_class_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0xfe,0x03,0x00,0x7f,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_class_f16 vcc_lo, v128, v2
-// W32: v_cmp_class_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0xfe,0x03,0x00,0x7f,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_eq_f16 vcc, v128, v2
-// W64: v_cmp_eq_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v128, v2
+// W32: v_cmp_u_f16_e64 vcc_lo, v128, v2        ; encoding: [0x6a,0x00,0x08,0xd4,0x80,0x05,0x02,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_f16 vcc_lo, v128, v2
-// W32: v_cmp_eq_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x80,0x77,0x39,0x05]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_eq_i16 vcc, v128, v2
-// W64: v_cmp_eq_i16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x6a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x80,0x1b,0x00,0xff]
+// W64-ERR: :[[@LINE-2]]:13: error: invalid operand for instruction
 
-v_cmp_eq_i16 vcc_lo, v128, v2
-// W32: v_cmp_eq_i16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmp_u_f16 vcc_lo, vcc_hi, v255
+// W32: v_cmp_u_f16_e64 vcc_lo, vcc_hi, v255    ; encoding: [0x6a,0x00,0x08,0xd4,0x6b,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
 
-v_cmp_eq_u16 vcc, v128, v2
-// W64: v_cmp_eq_u16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_eq_u16 vcc_lo, v128, v2
-// W32: v_cmp_eq_u16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_f16 vcc, v128, v2
-// W64: v_cmp_ge_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_f16 vcc_lo, v128, v2
-// W32: v_cmp_ge_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_i16 vcc, v128, v2
-// W64: v_cmp_ge_i16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_i16 vcc_lo, v128, v2
-// W32: v_cmp_ge_i16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_u16 vcc, v128, v2
-// W64: v_cmp_ge_u16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ge_u16 vcc_lo, v128, v2
-// W32: v_cmp_ge_u16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16 vcc, v128, v2
-// W64: v_cmp_gt_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_f16 vcc_lo, v128, v2
-// W32: v_cmp_gt_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16 vcc, v128, v2
-// W64: v_cmp_gt_i16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_i16 vcc_lo, v128, v2
-// W32: v_cmp_gt_i16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_u16 vcc, v128, v2
-// W64: v_cmp_gt_u16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_gt_u16 vcc_lo, v128, v2
-// W32: v_cmp_gt_u16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc, v128, v2
-// W64: v_cmp_le_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_f16 vcc_lo, v128, v2
-// W32: v_cmp_le_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16 vcc, v128, v2
-// W64: v_cmp_le_i16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_i16 vcc_lo, v128, v2
-// W32: v_cmp_le_i16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_u16 vcc, v128, v2
-// W64: v_cmp_le_u16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_le_u16 vcc_lo, v128, v2
-// W32: v_cmp_le_u16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lg_f16 vcc, v128, v2
-// W64: v_cmp_lg_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lg_f16 vcc_lo, v128, v2
-// W32: v_cmp_lg_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_f16 vcc, v128, v2
-// W64: v_cmp_lt_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_f16 vcc_lo, v128, v2
-// W32: v_cmp_lt_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_i16 vcc, v128, v2
-// W64: v_cmp_lt_i16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_i16 vcc_lo, v128, v2
-// W32: v_cmp_lt_i16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_u16 vcc, v128, v2
-// W64: v_cmp_lt_u16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_lt_u16 vcc_lo, v128, v2
-// W32: v_cmp_lt_u16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_i16 vcc, v128, v2
-// W64: v_cmp_ne_i16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_i16 vcc_lo, v128, v2
-// W32: v_cmp_ne_i16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_u16 vcc, v128, v2
-// W64: v_cmp_ne_u16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ne_u16 vcc_lo, v128, v2
-// W32: v_cmp_ne_u16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc, v128, v2
-// W64: v_cmp_neq_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_neq_f16 vcc_lo, v128, v2
-// W32: v_cmp_neq_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16 vcc, v128, v2
-// W64: v_cmp_nge_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nge_f16 vcc_lo, v128, v2
-// W32: v_cmp_nge_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16 vcc, v128, v2
-// W64: v_cmp_ngt_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_ngt_f16 vcc_lo, v128, v2
-// W32: v_cmp_ngt_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nle_f16 vcc, v128, v2
-// W64: v_cmp_nle_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nle_f16 vcc_lo, v128, v2
-// W32: v_cmp_nle_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlg_f16 vcc, v128, v2
-// W64: v_cmp_nlg_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlg_f16 vcc_lo, v128, v2
-// W32: v_cmp_nlg_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlt_f16 vcc, v128, v2
-// W64: v_cmp_nlt_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_nlt_f16 vcc_lo, v128, v2
-// W32: v_cmp_nlt_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_o_f16 vcc, v128, v2
-// W64: v_cmp_o_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_o_f16 vcc_lo, v128, v2
-// W32: v_cmp_o_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16 vcc, v128, v2
-// W64: v_cmp_u_f16_e64 vcc, v128, v2
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_u_f16 vcc_lo, v128, v2
-// W32: v_cmp_u_f16_e64 vcc_lo, v128, v2
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ne_i16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ne_i16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ne_u16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ne_u16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_class_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_class_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_eq_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_eq_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ge_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ge_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_gt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_gt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_le_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_le_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_lt_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_lt_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ne_i16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ne_i16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ne_u16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ne_u16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_neq_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_neq_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nge_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nge_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_ngt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_ngt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nle_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nle_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlg_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlg_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_nlt_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_nlt_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_o_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_o_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64: v_cmp_u_f16_e64_dpp vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
-
-v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W32: v_cmp_u_f16_e64_dpp vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
-// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmp_u_f16 vcc_lo, vcc_lo, v255
+// W32: v_cmp_u_f16_e64 vcc_lo, vcc_lo, v255    ; encoding: [0x6a,0x00,0x08,0xd4,0x6a,0xfe,0x03,0x00]
+// W64-ERR: :[[@LINE-2]]:1: error: operands are not valid for this GPU or mode
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s
new file mode 100644
index 000000000000000..a5b673494f134b9
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx-fake16.s
@@ -0,0 +1,3404 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_cmpx_class_f16_e32 v1, v2
+// GFX12: encoding: [0x01,0x05,0xfa,0x7d]
+
+v_cmpx_class_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0xfa,0x7d]
+
+v_cmpx_class_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0xfa,0x7d]
+
+v_cmpx_class_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0xfa,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_class_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0xfc,0x7d]
+
+v_cmpx_class_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0xfc,0x7d]
+
+v_cmpx_class_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0xfc,0x7d]
+
+v_cmpx_class_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0xfd,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_class_f64 v[1:2], v2
+// GFX12: encoding: [0x01,0x05,0xfe,0x7d]
+
+v_cmpx_class_f64 v[254:255], v2
+// GFX12: encoding: [0xfe,0x05,0xfe,0x7d]
+
+v_cmpx_class_f64 s[2:3], v2
+// GFX12: encoding: [0x02,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 s[104:105], v2
+// GFX12: encoding: [0x68,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 vcc, v2
+// GFX12: encoding: [0x6a,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 ttmp[14:15], v2
+// GFX12: encoding: [0x7a,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 exec, v2
+// GFX12: encoding: [0x7e,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 null, v2
+// GFX12: encoding: [0x7c,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 -1, v2
+// GFX12: encoding: [0xc1,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0xfe,0x7d]
+
+v_cmpx_class_f64 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0xff,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x04,0x7d]
+
+v_cmpx_eq_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x04,0x7d]
+
+v_cmpx_eq_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x04,0x7d]
+
+v_cmpx_eq_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x04,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x24,0x7d]
+
+v_cmpx_eq_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x24,0x7d]
+
+v_cmpx_eq_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x24,0x7d]
+
+v_cmpx_eq_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x25,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x44,0x7d]
+
+v_cmpx_eq_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x44,0x7d]
+
+v_cmpx_eq_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x44,0x7d]
+
+v_cmpx_eq_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x45,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x64,0x7d]
+
+v_cmpx_eq_i16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x64,0x7d]
+
+v_cmpx_eq_i16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x64,0x7d]
+
+v_cmpx_eq_i16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x64,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_i32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x84,0x7d]
+
+v_cmpx_eq_i32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x84,0x7d]
+
+v_cmpx_eq_i32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x84,0x7d]
+
+v_cmpx_eq_i32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x85,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_i64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xa4,0x7d]
+
+v_cmpx_eq_i64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xa4,0x7d]
+
+v_cmpx_eq_i64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xa4,0x7d]
+
+v_cmpx_eq_i64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xa5,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x74,0x7d]
+
+v_cmpx_eq_u16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x74,0x7d]
+
+v_cmpx_eq_u16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x74,0x7d]
+
+v_cmpx_eq_u16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x74,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_eq_u32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x94,0x7d]
+
+v_cmpx_eq_u32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x94,0x7d]
+
+v_cmpx_eq_u32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x94,0x7d]
+
+v_cmpx_eq_u32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x95,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_eq_u64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xb4,0x7d]
+
+v_cmpx_eq_u64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xb4,0x7d]
+
+v_cmpx_eq_u64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xb4,0x7d]
+
+v_cmpx_eq_u64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xb5,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x0c,0x7d]
+
+v_cmpx_ge_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x0c,0x7d]
+
+v_cmpx_ge_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x0c,0x7d]
+
+v_cmpx_ge_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x0c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x2c,0x7d]
+
+v_cmpx_ge_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x2c,0x7d]
+
+v_cmpx_ge_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x2c,0x7d]
+
+v_cmpx_ge_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x2d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x4c,0x7d]
+
+v_cmpx_ge_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x4c,0x7d]
+
+v_cmpx_ge_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x4c,0x7d]
+
+v_cmpx_ge_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x4d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x6c,0x7d]
+
+v_cmpx_ge_i16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x6c,0x7d]
+
+v_cmpx_ge_i16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x6c,0x7d]
+
+v_cmpx_ge_i16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x6c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_i32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x8c,0x7d]
+
+v_cmpx_ge_i32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x8c,0x7d]
+
+v_cmpx_ge_i32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x8c,0x7d]
+
+v_cmpx_ge_i32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x8d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_i64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xac,0x7d]
+
+v_cmpx_ge_i64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xac,0x7d]
+
+v_cmpx_ge_i64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xac,0x7d]
+
+v_cmpx_ge_i64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xad,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x7c,0x7d]
+
+v_cmpx_ge_u16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x7c,0x7d]
+
+v_cmpx_ge_u16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x7c,0x7d]
+
+v_cmpx_ge_u16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x7c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ge_u32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x9c,0x7d]
+
+v_cmpx_ge_u32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x9c,0x7d]
+
+v_cmpx_ge_u32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x9c,0x7d]
+
+v_cmpx_ge_u32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x9d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ge_u64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xbc,0x7d]
+
+v_cmpx_ge_u64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xbc,0x7d]
+
+v_cmpx_ge_u64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xbc,0x7d]
+
+v_cmpx_ge_u64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xbd,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x08,0x7d]
+
+v_cmpx_gt_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x08,0x7d]
+
+v_cmpx_gt_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x08,0x7d]
+
+v_cmpx_gt_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x08,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x28,0x7d]
+
+v_cmpx_gt_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x28,0x7d]
+
+v_cmpx_gt_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x28,0x7d]
+
+v_cmpx_gt_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x29,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x48,0x7d]
+
+v_cmpx_gt_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x48,0x7d]
+
+v_cmpx_gt_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x48,0x7d]
+
+v_cmpx_gt_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x49,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x68,0x7d]
+
+v_cmpx_gt_i16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x68,0x7d]
+
+v_cmpx_gt_i16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x68,0x7d]
+
+v_cmpx_gt_i16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x68,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_i32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x88,0x7d]
+
+v_cmpx_gt_i32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x88,0x7d]
+
+v_cmpx_gt_i32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x88,0x7d]
+
+v_cmpx_gt_i32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x89,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_i64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xa8,0x7d]
+
+v_cmpx_gt_i64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xa8,0x7d]
+
+v_cmpx_gt_i64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xa8,0x7d]
+
+v_cmpx_gt_i64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xa9,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x78,0x7d]
+
+v_cmpx_gt_u16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x78,0x7d]
+
+v_cmpx_gt_u16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x78,0x7d]
+
+v_cmpx_gt_u16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x78,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_gt_u32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x98,0x7d]
+
+v_cmpx_gt_u32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x98,0x7d]
+
+v_cmpx_gt_u32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x98,0x7d]
+
+v_cmpx_gt_u32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x99,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_gt_u64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xb8,0x7d]
+
+v_cmpx_gt_u64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xb8,0x7d]
+
+v_cmpx_gt_u64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xb8,0x7d]
+
+v_cmpx_gt_u64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xb9,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x06,0x7d]
+
+v_cmpx_le_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x06,0x7d]
+
+v_cmpx_le_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x06,0x7d]
+
+v_cmpx_le_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x06,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x26,0x7d]
+
+v_cmpx_le_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x26,0x7d]
+
+v_cmpx_le_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x26,0x7d]
+
+v_cmpx_le_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x27,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x46,0x7d]
+
+v_cmpx_le_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x46,0x7d]
+
+v_cmpx_le_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x46,0x7d]
+
+v_cmpx_le_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x47,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x66,0x7d]
+
+v_cmpx_le_i16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x66,0x7d]
+
+v_cmpx_le_i16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x66,0x7d]
+
+v_cmpx_le_i16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x66,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_i32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x86,0x7d]
+
+v_cmpx_le_i32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x86,0x7d]
+
+v_cmpx_le_i32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x86,0x7d]
+
+v_cmpx_le_i32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x87,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_i64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xa6,0x7d]
+
+v_cmpx_le_i64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xa6,0x7d]
+
+v_cmpx_le_i64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xa6,0x7d]
+
+v_cmpx_le_i64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xa7,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x76,0x7d]
+
+v_cmpx_le_u16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x76,0x7d]
+
+v_cmpx_le_u16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x76,0x7d]
+
+v_cmpx_le_u16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x76,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_le_u32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x96,0x7d]
+
+v_cmpx_le_u32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x96,0x7d]
+
+v_cmpx_le_u32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x96,0x7d]
+
+v_cmpx_le_u32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x97,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_le_u64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xb6,0x7d]
+
+v_cmpx_le_u64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xb6,0x7d]
+
+v_cmpx_le_u64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xb6,0x7d]
+
+v_cmpx_le_u64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xb7,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x0a,0x7d]
+
+v_cmpx_lg_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x0a,0x7d]
+
+v_cmpx_lg_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x0a,0x7d]
+
+v_cmpx_lg_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x0a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lg_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x2a,0x7d]
+
+v_cmpx_lg_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x2a,0x7d]
+
+v_cmpx_lg_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x2a,0x7d]
+
+v_cmpx_lg_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x2b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lg_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x4a,0x7d]
+
+v_cmpx_lg_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x4a,0x7d]
+
+v_cmpx_lg_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x4a,0x7d]
+
+v_cmpx_lg_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x4b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x02,0x7d]
+
+v_cmpx_lt_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x02,0x7d]
+
+v_cmpx_lt_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x02,0x7d]
+
+v_cmpx_lt_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x02,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x22,0x7d]
+
+v_cmpx_lt_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x22,0x7d]
+
+v_cmpx_lt_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x22,0x7d]
+
+v_cmpx_lt_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x23,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x42,0x7d]
+
+v_cmpx_lt_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x42,0x7d]
+
+v_cmpx_lt_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x42,0x7d]
+
+v_cmpx_lt_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x43,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x62,0x7d]
+
+v_cmpx_lt_i16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x62,0x7d]
+
+v_cmpx_lt_i16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x62,0x7d]
+
+v_cmpx_lt_i16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x62,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_i32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x82,0x7d]
+
+v_cmpx_lt_i32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x82,0x7d]
+
+v_cmpx_lt_i32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x82,0x7d]
+
+v_cmpx_lt_i32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x83,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_i64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xa2,0x7d]
+
+v_cmpx_lt_i64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xa2,0x7d]
+
+v_cmpx_lt_i64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xa2,0x7d]
+
+v_cmpx_lt_i64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xa3,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x72,0x7d]
+
+v_cmpx_lt_u16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x72,0x7d]
+
+v_cmpx_lt_u16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x72,0x7d]
+
+v_cmpx_lt_u16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x72,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_lt_u32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x92,0x7d]
+
+v_cmpx_lt_u32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x92,0x7d]
+
+v_cmpx_lt_u32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x92,0x7d]
+
+v_cmpx_lt_u32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x93,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_lt_u64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xb2,0x7d]
+
+v_cmpx_lt_u64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xb2,0x7d]
+
+v_cmpx_lt_u64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xb2,0x7d]
+
+v_cmpx_lt_u64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xb3,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x6a,0x7d]
+
+v_cmpx_ne_i16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x6a,0x7d]
+
+v_cmpx_ne_i16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x6a,0x7d]
+
+v_cmpx_ne_i16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x6a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_i32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x8a,0x7d]
+
+v_cmpx_ne_i32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x8a,0x7d]
+
+v_cmpx_ne_i32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x8a,0x7d]
+
+v_cmpx_ne_i32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x8b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_i64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xaa,0x7d]
+
+v_cmpx_ne_i64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xaa,0x7d]
+
+v_cmpx_ne_i64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xaa,0x7d]
+
+v_cmpx_ne_i64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xab,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x7a,0x7d]
+
+v_cmpx_ne_u16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x7a,0x7d]
+
+v_cmpx_ne_u16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x7a,0x7d]
+
+v_cmpx_ne_u16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x7a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ne_u32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x9a,0x7d]
+
+v_cmpx_ne_u32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x9a,0x7d]
+
+v_cmpx_ne_u32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x9a,0x7d]
+
+v_cmpx_ne_u32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x9b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ne_u64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0xba,0x7d]
+
+v_cmpx_ne_u64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0xba,0x7d]
+
+v_cmpx_ne_u64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0xba,0x7d]
+
+v_cmpx_ne_u64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0xbb,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x1a,0x7d]
+
+v_cmpx_neq_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x1a,0x7d]
+
+v_cmpx_neq_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x1a,0x7d]
+
+v_cmpx_neq_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x1a,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_neq_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x3a,0x7d]
+
+v_cmpx_neq_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x3a,0x7d]
+
+v_cmpx_neq_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x3a,0x7d]
+
+v_cmpx_neq_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x3b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_neq_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x5a,0x7d]
+
+v_cmpx_neq_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x5a,0x7d]
+
+v_cmpx_neq_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x5a,0x7d]
+
+v_cmpx_neq_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x5b,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x12,0x7d]
+
+v_cmpx_nge_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x12,0x7d]
+
+v_cmpx_nge_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x12,0x7d]
+
+v_cmpx_nge_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x12,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nge_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x32,0x7d]
+
+v_cmpx_nge_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x32,0x7d]
+
+v_cmpx_nge_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x32,0x7d]
+
+v_cmpx_nge_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x33,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nge_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x52,0x7d]
+
+v_cmpx_nge_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x52,0x7d]
+
+v_cmpx_nge_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x52,0x7d]
+
+v_cmpx_nge_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x53,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x16,0x7d]
+
+v_cmpx_ngt_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x16,0x7d]
+
+v_cmpx_ngt_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x16,0x7d]
+
+v_cmpx_ngt_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x16,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_ngt_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x36,0x7d]
+
+v_cmpx_ngt_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x36,0x7d]
+
+v_cmpx_ngt_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x36,0x7d]
+
+v_cmpx_ngt_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x37,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_ngt_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x56,0x7d]
+
+v_cmpx_ngt_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x56,0x7d]
+
+v_cmpx_ngt_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x56,0x7d]
+
+v_cmpx_ngt_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x57,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x18,0x7d]
+
+v_cmpx_nle_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x18,0x7d]
+
+v_cmpx_nle_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x18,0x7d]
+
+v_cmpx_nle_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x18,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nle_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x38,0x7d]
+
+v_cmpx_nle_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x38,0x7d]
+
+v_cmpx_nle_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x38,0x7d]
+
+v_cmpx_nle_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x39,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nle_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x58,0x7d]
+
+v_cmpx_nle_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x58,0x7d]
+
+v_cmpx_nle_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x58,0x7d]
+
+v_cmpx_nle_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x59,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x14,0x7d]
+
+v_cmpx_nlg_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x14,0x7d]
+
+v_cmpx_nlg_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x14,0x7d]
+
+v_cmpx_nlg_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x14,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlg_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x34,0x7d]
+
+v_cmpx_nlg_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x34,0x7d]
+
+v_cmpx_nlg_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x34,0x7d]
+
+v_cmpx_nlg_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x35,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlg_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x54,0x7d]
+
+v_cmpx_nlg_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x54,0x7d]
+
+v_cmpx_nlg_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x54,0x7d]
+
+v_cmpx_nlg_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x55,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x1c,0x7d]
+
+v_cmpx_nlt_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x1c,0x7d]
+
+v_cmpx_nlt_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x1c,0x7d]
+
+v_cmpx_nlt_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x1c,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_nlt_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x3c,0x7d]
+
+v_cmpx_nlt_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x3c,0x7d]
+
+v_cmpx_nlt_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x3c,0x7d]
+
+v_cmpx_nlt_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x3d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_nlt_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x5c,0x7d]
+
+v_cmpx_nlt_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x5c,0x7d]
+
+v_cmpx_nlt_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x5c,0x7d]
+
+v_cmpx_nlt_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x5d,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x0e,0x7d]
+
+v_cmpx_o_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x0e,0x7d]
+
+v_cmpx_o_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x0e,0x7d]
+
+v_cmpx_o_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x0e,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_o_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x2e,0x7d]
+
+v_cmpx_o_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x2e,0x7d]
+
+v_cmpx_o_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x2e,0x7d]
+
+v_cmpx_o_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x2f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_o_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x4e,0x7d]
+
+v_cmpx_o_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x4e,0x7d]
+
+v_cmpx_o_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x4e,0x7d]
+
+v_cmpx_o_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x4f,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f16 v1, v2
+// GFX12: encoding: [0x01,0x05,0x10,0x7d]
+
+v_cmpx_u_f16 v127, v2
+// GFX12: encoding: [0x7f,0x05,0x10,0x7d]
+
+v_cmpx_u_f16 s1, v2
+// GFX12: encoding: [0x01,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 s105, v2
+// GFX12: encoding: [0x69,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 null, v2
+// GFX12: encoding: [0x7c,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x10,0x7d]
+
+v_cmpx_u_f16 0xfe0b, v127
+// GFX12: encoding: [0xff,0xfe,0x10,0x7d,0x0b,0xfe,0x00,0x00]
+
+v_cmpx_u_f32 v1, v2
+// GFX12: encoding: [0x01,0x05,0x30,0x7d]
+
+v_cmpx_u_f32 v255, v2
+// GFX12: encoding: [0xff,0x05,0x30,0x7d]
+
+v_cmpx_u_f32 s1, v2
+// GFX12: encoding: [0x01,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 s105, v2
+// GFX12: encoding: [0x69,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 vcc_lo, v2
+// GFX12: encoding: [0x6a,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 vcc_hi, v2
+// GFX12: encoding: [0x6b,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 ttmp15, v2
+// GFX12: encoding: [0x7b,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 m0, v2
+// GFX12: encoding: [0x7d,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 exec_lo, v2
+// GFX12: encoding: [0x7e,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 exec_hi, v2
+// GFX12: encoding: [0x7f,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 null, v2
+// GFX12: encoding: [0x7c,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 -1, v2
+// GFX12: encoding: [0xc1,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 0.5, v2
+// GFX12: encoding: [0xf0,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 src_scc, v2
+// GFX12: encoding: [0xfd,0x04,0x30,0x7d]
+
+v_cmpx_u_f32 0xaf123456, v255
+// GFX12: encoding: [0xff,0xfe,0x31,0x7d,0x56,0x34,0x12,0xaf]
+
+v_cmpx_u_f64 v[1:2], v[2:3]
+// GFX12: encoding: [0x01,0x05,0x50,0x7d]
+
+v_cmpx_u_f64 v[254:255], v[2:3]
+// GFX12: encoding: [0xfe,0x05,0x50,0x7d]
+
+v_cmpx_u_f64 s[2:3], v[2:3]
+// GFX12: encoding: [0x02,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 s[104:105], v[2:3]
+// GFX12: encoding: [0x68,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 vcc, v[2:3]
+// GFX12: encoding: [0x6a,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 ttmp[14:15], v[2:3]
+// GFX12: encoding: [0x7a,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 exec, v[2:3]
+// GFX12: encoding: [0x7e,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 null, v[2:3]
+// GFX12: encoding: [0x7c,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 -1, v[2:3]
+// GFX12: encoding: [0xc1,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 0.5, v[2:3]
+// GFX12: encoding: [0xf0,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 src_scc, v[2:3]
+// GFX12: encoding: [0xfd,0x04,0x50,0x7d]
+
+v_cmpx_u_f64 0xaf123456, v[254:255]
+// GFX12: encoding: [0xff,0xfc,0x51,0x7d,0x56,0x34,0x12,0xaf]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s
index 4c5a8e638e3fbe5..8c01cf4fbce20e2 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
 
 v_cmpx_class_f16_e32 v1, v2
 // GFX12: encoding: [0x01,0x05,0xfa,0x7d]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s
new file mode 100644
index 000000000000000..d4e8069f87984ca
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16-fake16.s
@@ -0,0 +1,2270 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_class_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_class_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f16 -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0xfa,0x7d,0x7f,0x6f,0x35,0x30]
+
+v_cmpx_class_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_class_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_class_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_class_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_class_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0xfc,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_class_f32 -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0xfd,0x7d,0xff,0x6f,0x35,0x30]
+
+v_cmpx_eq_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x04,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x04,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_eq_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x24,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x25,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_eq_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x64,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x64,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_eq_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x84,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x85,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_eq_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x74,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x74,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_eq_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_eq_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_eq_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_eq_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x94,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_eq_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x95,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x0c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x0c,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_ge_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x2c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x2d,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_ge_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x6c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x6c,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ge_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x8c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x8d,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ge_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x7c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x7c,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ge_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ge_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ge_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ge_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x9c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ge_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x9d,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x08,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x08,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_gt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x28,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x29,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_gt_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x68,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x68,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_gt_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x88,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x89,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_gt_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x78,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x78,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_gt_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_gt_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_gt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_gt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x98,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_gt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x99,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x06,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x06,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_le_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x26,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x27,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_le_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_i16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x66,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x66,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_le_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_i32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x86,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x87,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_le_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_u16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x76,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x76,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_le_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_le_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_le_u32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_le_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_le_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x96,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_le_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x97,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lg_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x0a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x0a,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_lg_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lg_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x2a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x2b,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_lt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x02,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x02,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_lt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x22,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x23,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_lt_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x62,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x62,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_lt_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x82,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x83,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_lt_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x72,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x72,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_lt_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_lt_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_lt_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_lt_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x92,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_lt_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x93,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_i16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_i16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_i16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x6a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_i16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x6a,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ne_i32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_i32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_i32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_i32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x8a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_i32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x8b,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_ne_u16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_u16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_u16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x7a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_u16 v127, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x7a,0x7d,0x7f,0x6f,0x05,0x30]
+
+v_cmpx_ne_u32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ne_u32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ne_u32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ne_u32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x9a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ne_u32 v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x9b,0x7d,0xff,0x6f,0x05,0x30]
+
+v_cmpx_neq_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_neq_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_neq_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x1a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_neq_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x1a,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_neq_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_neq_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_neq_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_neq_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x3a,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_neq_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x3b,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nge_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nge_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nge_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x12,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nge_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x12,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nge_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nge_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nge_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nge_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x32,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nge_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x33,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_ngt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ngt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ngt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x16,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ngt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x16,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_ngt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_ngt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_ngt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_ngt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x36,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_ngt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x37,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nle_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nle_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nle_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x18,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nle_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x18,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nle_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nle_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nle_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nle_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x38,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nle_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x39,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nlg_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlg_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlg_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x14,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlg_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x14,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nlg_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlg_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlg_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlg_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x34,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlg_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x35,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_nlt_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlt_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlt_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x1c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlt_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x1c,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_nlt_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_nlt_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_nlt_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_nlt_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x3c,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_nlt_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x3d,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_o_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_o_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_o_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_o_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x0e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_o_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x0e,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_o_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_o_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_o_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_o_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_o_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x2e,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_o_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x2f,0x7d,0xff,0x6f,0xf5,0x30]
+
+v_cmpx_u_f16 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f16 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_u_f16 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_u_f16 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_u_f16 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x10,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_u_f16 -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x10,0x7d,0x7f,0x6f,0xf5,0x30]
+
+v_cmpx_u_f32 v1, v2 quad_perm:[3,2,1,0]
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f32 v1, v2 quad_perm:[0,1,2,3]
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0xe4,0x00,0xff]
+
+v_cmpx_u_f32 v1, v2 row_mirror
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x40,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_half_mirror
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x41,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shl:1
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x01,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shl:15
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x0f,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shr:1
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x11,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_shr:15
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x1f,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_ror:1
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x21,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_ror:15
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x2f,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x50,0x01,0xff]
+
+v_cmpx_u_f32 v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x5f,0x01,0x01]
+
+v_cmpx_u_f32 v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: encoding: [0xfa,0x04,0x30,0x7d,0x01,0x60,0x09,0x13]
+
+v_cmpx_u_f32 -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: encoding: [0xfa,0xfe,0x31,0x7d,0xff,0x6f,0xf5,0x30]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s
index 9c29f5bcd714b1f..2dc2ecfbe9ba73f 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp16.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
 
 v_cmpx_class_f16_dpp v1, v2 quad_perm:[3,2,1,0]
 // GFX12: encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s
new file mode 100644
index 000000000000000..067a1e2a65d20d8
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8-fake16.s
@@ -0,0 +1,488 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+
+v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0xfa,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0xfc,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_class_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0xfd,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x04,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x04,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x04,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x24,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x24,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x25,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x64,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x64,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x64,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x84,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x84,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x85,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x74,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x74,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x74,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x94,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x94,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_eq_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x95,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x0c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x0c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x2c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x2d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x6c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x6c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x8c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x8d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x7c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x7c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x9c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ge_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x9d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x08,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x08,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x08,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x28,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x28,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x29,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x68,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x68,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x68,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x88,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x88,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x89,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x78,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x78,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x78,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x98,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x98,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_gt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x99,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x06,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x06,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x06,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x26,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x26,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x27,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x66,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x66,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x66,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x86,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x86,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x87,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x76,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x76,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x76,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x96,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x96,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_le_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x97,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x0a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x0a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x2a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x2b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x02,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x02,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x02,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x22,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x22,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x23,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x62,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x62,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x62,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x82,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x82,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x83,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x72,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x72,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x72,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x92,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x92,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_lt_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x93,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x6a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x6a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x8a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_i32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x8b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x7a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x7a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x9a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ne_u32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x9b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x1a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x1a,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x3a,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_neq_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x3b,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x12,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x12,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x12,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x32,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x32,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nge_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x33,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x16,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x16,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x16,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x36,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x36,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_ngt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x37,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x18,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x18,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x18,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x38,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x38,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nle_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x39,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x14,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x14,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x14,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x34,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x34,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlg_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x35,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x1c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x1c,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x3c,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_nlt_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x3d,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x0e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x0e,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x2e,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_o_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x2f,0x7d,0xff,0x00,0x00,0x00]
+
+v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x10,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x10,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x10,0x7d,0x7f,0x00,0x00,0x00]
+
+v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: encoding: [0xe9,0x04,0x30,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32 v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: encoding: [0xea,0x04,0x30,0x7d,0x01,0x77,0x39,0x05]
+
+v_cmpx_u_f32 v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: encoding: [0xe9,0xfe,0x31,0x7d,0xff,0x00,0x00,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s
index 0f82932a9e34bb0..a679d693a595f25 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_dpp8.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s
 
 v_cmpx_class_f16_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0]
 // GFX12: encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s
index 8d6bff1521010f9..5019324d174b873 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_err.s
@@ -1,487 +1,488 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s
 
 v_cmpx_class_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmpx_eq_i16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
-
-v_cmpx_eq_u16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:31: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v1, v255
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_class_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_u16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v255, v2
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode
+v_cmpx_gt_i16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_class_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
 v_cmpx_le_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_class_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_lt_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_class_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_i16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ne_u16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:28: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_neq_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nge_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmpx_ngt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_ngt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_class_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_eq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_eq_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_eq_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nle_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ge_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ge_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_gt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_gt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_gt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_le_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlg_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_le_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_le_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_lt_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_lt_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_nlt_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:29: error: invalid operand for instruction
 
-v_cmpx_ne_i16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ne_u16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_neq_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_nge_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_ngt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_nle_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_o_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_nlg_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v1, v255
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
-v_cmpx_nlt_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
 
-v_cmpx_o_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+v_cmpx_u_f16_e32 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmpx_u_f16_e32 v255, v2
+// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmpx_u_f16_e32 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
+
+v_cmpx_u_f16_e32 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s
index 9d7b60b917306df..4f462861e3a0b05 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vopcx_t16_promote.s
@@ -1,487 +1,488 @@
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 %s
+; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 %s
 
 v_cmpx_class_f16 v1, v255
-// GFX12: v_cmpx_class_f16_e64
+// GFX12: v_cmpx_class_f16_e64 v1, v255           ; encoding: [0x7e,0x00,0xfd,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_f16 v1, v255
-// GFX12: v_cmpx_eq_f16_e64
-
-v_cmpx_eq_i16 v1, v255
-// GFX12: v_cmpx_eq_i16_e64
-
-v_cmpx_eq_u16 v1, v255
-// GFX12: v_cmpx_eq_u16_e64
+v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v1, v255
-// GFX12: v_cmpx_ge_f16_e64
+v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v1, v255
-// GFX12: v_cmpx_ge_i16_e64
+v_cmpx_class_f16 v255, v2
+// GFX12: v_cmpx_class_f16_e64 v255, v2           ; encoding: [0x7e,0x00,0xfd,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ge_u16 v1, v255
-// GFX12: v_cmpx_ge_u16_e64
+v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v1, v255
-// GFX12: v_cmpx_gt_f16_e64
+v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_class_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v1, v255
-// GFX12: v_cmpx_gt_i16_e64
+v_cmpx_eq_f16 v1, v255
+// GFX12: v_cmpx_eq_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x82,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_gt_u16 v1, v255
-// GFX12: v_cmpx_gt_u16_e64
+v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v1, v255
-// GFX12: v_cmpx_le_f16_e64
+v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v1, v255
-// GFX12: v_cmpx_le_i16_e64
+v_cmpx_eq_f16 v255, v2
+// GFX12: v_cmpx_eq_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x82,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_le_u16 v1, v255
-// GFX12: v_cmpx_le_u16_e64
+v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v1, v255
-// GFX12: v_cmpx_lg_f16_e64
+v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v1, v255
-// GFX12: v_cmpx_lt_f16_e64
+v_cmpx_eq_i16 v1, v255
+// GFX12: v_cmpx_eq_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb2,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_lt_i16 v1, v255
-// GFX12: v_cmpx_lt_i16_e64
+v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v1, v255
-// GFX12: v_cmpx_lt_u16_e64
+v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v1, v255
-// GFX12: v_cmpx_ne_i16_e64
+v_cmpx_eq_i16 v255, v2
+// GFX12: v_cmpx_eq_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb2,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ne_u16 v1, v255
-// GFX12: v_cmpx_ne_u16_e64
+v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v1, v255
-// GFX12: v_cmpx_neq_f16_e64
+v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v1, v255
-// GFX12: v_cmpx_nge_f16_e64
+v_cmpx_eq_u16 v1, v255
+// GFX12: v_cmpx_eq_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xba,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ngt_f16 v1, v255
-// GFX12: v_cmpx_ngt_f16_e64
+v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v1, v255
-// GFX12: v_cmpx_nle_f16_e64
+v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v1, v255
-// GFX12: v_cmpx_nlg_f16_e64
+v_cmpx_eq_u16 v255, v2
+// GFX12: v_cmpx_eq_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xba,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_nlt_f16 v1, v255
-// GFX12: v_cmpx_nlt_f16_e64
+v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v1, v255
-// GFX12: v_cmpx_o_f16_e64
+v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_eq_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_u_f16 v1, v255
-// GFX12: v_cmpx_u_f16_e64
+v_cmpx_ge_f16 v1, v255
+// GFX12: v_cmpx_ge_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x86,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_class_f16 v255, v2
-// GFX12: v_cmpx_class_f16_e64
+v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_f16 v255, v2
-// GFX12: v_cmpx_eq_f16_e64
+v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_i16 v255, v2
-// GFX12: v_cmpx_eq_i16_e64
+v_cmpx_ge_f16 v255, v2
+// GFX12: v_cmpx_ge_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x86,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_eq_u16 v255, v2
-// GFX12: v_cmpx_eq_u16_e64
+v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v255, v2
-// GFX12: v_cmpx_ge_f16_e64
+v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v255, v2
-// GFX12: v_cmpx_ge_i16_e64
+v_cmpx_ge_i16 v1, v255
+// GFX12: v_cmpx_ge_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb6,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v255, v2
-// GFX12: v_cmpx_ge_u16_e64
+v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v255, v2
-// GFX12: v_cmpx_gt_f16_e64
+v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v255, v2
-// GFX12: v_cmpx_gt_i16_e64
+v_cmpx_ge_i16 v255, v2
+// GFX12: v_cmpx_ge_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb6,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v255, v2
-// GFX12: v_cmpx_gt_u16_e64
+v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v255, v2
-// GFX12: v_cmpx_le_f16_e64
+v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v255, v2
-// GFX12: v_cmpx_le_i16_e64
+v_cmpx_ge_u16 v1, v255
+// GFX12: v_cmpx_ge_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbe,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v255, v2
-// GFX12: v_cmpx_le_u16_e64
+v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v255, v2
-// GFX12: v_cmpx_lg_f16_e64
+v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v255, v2
-// GFX12: v_cmpx_lt_f16_e64
+v_cmpx_ge_u16 v255, v2
+// GFX12: v_cmpx_ge_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbe,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v255, v2
-// GFX12: v_cmpx_lt_i16_e64
+v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v255, v2
-// GFX12: v_cmpx_lt_u16_e64
+v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ge_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v255, v2
-// GFX12: v_cmpx_ne_i16_e64
+v_cmpx_gt_f16 v1, v255
+// GFX12: v_cmpx_gt_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x84,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ne_u16 v255, v2
-// GFX12: v_cmpx_ne_u16_e64
+v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v255, v2
-// GFX12: v_cmpx_neq_f16_e64
+v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v255, v2
-// GFX12: v_cmpx_nge_f16_e64
+v_cmpx_gt_f16 v255, v2
+// GFX12: v_cmpx_gt_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x84,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ngt_f16 v255, v2
-// GFX12: v_cmpx_ngt_f16_e64
+v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v255, v2
-// GFX12: v_cmpx_nle_f16_e64
+v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v255, v2
-// GFX12: v_cmpx_nlg_f16_e64
+v_cmpx_gt_i16 v1, v255
+// GFX12: v_cmpx_gt_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb4,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_nlt_f16 v255, v2
-// GFX12: v_cmpx_nlt_f16_e64
+v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v255, v2
-// GFX12: v_cmpx_o_f16_e64
+v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_u_f16 v255, v2
-// GFX12: v_cmpx_u_f16_e64
+v_cmpx_gt_i16 v255, v2
+// GFX12: v_cmpx_gt_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb4,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_class_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_class_f16_e64
+v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_eq_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_eq_f16_e64
+v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_eq_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_eq_i16_e64
+v_cmpx_gt_u16 v1, v255
+// GFX12: v_cmpx_gt_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbc,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_eq_u16_e64
+v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ge_f16_e64
+v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ge_i16_e64
+v_cmpx_gt_u16 v255, v2
+// GFX12: v_cmpx_gt_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbc,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ge_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ge_u16_e64
+v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_gt_f16_e64
+v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_gt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_gt_i16_e64
+v_cmpx_le_f16 v1, v255
+// GFX12: v_cmpx_le_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x83,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_gt_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_gt_u16_e64
+v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
 v_cmpx_le_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_le_f16_e64
+// GFX12: v_cmpx_le_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_le_i16_e64
+v_cmpx_le_f16 v255, v2
+// GFX12: v_cmpx_le_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x83,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_le_u16_e64
+v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lg_f16_e64
+v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lt_f16_e64
+v_cmpx_le_i16 v1, v255
+// GFX12: v_cmpx_le_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb3,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lt_i16_e64
+v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lt_u16_e64
+v_cmpx_le_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ne_i16_e64
+v_cmpx_le_i16 v255, v2
+// GFX12: v_cmpx_le_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb3,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ne_u16_e64
+v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_neq_f16_e64
+v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nge_f16_e64
+v_cmpx_le_u16 v1, v255
+// GFX12: v_cmpx_le_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbb,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ngt_f16_e64
+v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nle_f16_e64
+v_cmpx_le_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nlg_f16_e64
+v_cmpx_le_u16 v255, v2
+// GFX12: v_cmpx_le_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbb,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nlt_f16_e64
+v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_o_f16_e64
+v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_le_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_u_f16_e64
+v_cmpx_lg_f16 v1, v255
+// GFX12: v_cmpx_lg_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x85,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_class_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_class_f16_e64
+v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_eq_f16_e64
+v_cmpx_lg_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_eq_i16_e64
+v_cmpx_lg_f16 v255, v2
+// GFX12: v_cmpx_lg_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x85,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_eq_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_eq_u16_e64
+v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ge_f16_e64
+v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ge_i16_e64
+v_cmpx_lt_f16 v1, v255
+// GFX12: v_cmpx_lt_f16_e64 v1, v255              ; encoding: [0x7e,0x00,0x81,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ge_u16_e64
+v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_gt_f16_e64
+v_cmpx_lt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_gt_i16_e64
+v_cmpx_lt_f16 v255, v2
+// GFX12: v_cmpx_lt_f16_e64 v255, v2              ; encoding: [0x7e,0x00,0x81,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_gt_u16_e64
+v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_le_f16_e64
+v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_le_i16_e64
+v_cmpx_lt_i16 v1, v255
+// GFX12: v_cmpx_lt_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb1,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_le_u16_e64
+v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lg_f16_e64
+v_cmpx_lt_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lt_f16_e64
+v_cmpx_lt_i16 v255, v2
+// GFX12: v_cmpx_lt_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb1,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lt_i16_e64
+v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_lt_u16_e64
+v_cmpx_lt_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ne_i16_e64
+v_cmpx_lt_u16 v1, v255
+// GFX12: v_cmpx_lt_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb9,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ne_u16_e64
+v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_neq_f16_e64
+v_cmpx_lt_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nge_f16_e64
+v_cmpx_lt_u16 v255, v2
+// GFX12: v_cmpx_lt_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb9,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_ngt_f16_e64
+v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nle_f16_e64
+v_cmpx_lt_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_lt_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nlg_f16_e64
+v_cmpx_ne_i16 v1, v255
+// GFX12: v_cmpx_ne_i16_e64 v1, v255              ; encoding: [0x7e,0x00,0xb5,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_nlt_f16_e64
+v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_o_f16_e64
+v_cmpx_ne_i16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0]
-// GFX12: v_cmpx_u_f16_e64
+v_cmpx_ne_i16 v255, v2
+// GFX12: v_cmpx_ne_i16_e64 v255, v2              ; encoding: [0x7e,0x00,0xb5,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_class_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_class_f16_e64
+v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_eq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_eq_f16_e64
+v_cmpx_ne_i16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_i16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_eq_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_eq_i16_e64
+v_cmpx_ne_u16 v1, v255
+// GFX12: v_cmpx_ne_u16_e64 v1, v255              ; encoding: [0x7e,0x00,0xbd,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_eq_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_eq_u16_e64
+v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ge_f16_e64
+v_cmpx_ne_u16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ge_i16_e64
+v_cmpx_ne_u16 v255, v2
+// GFX12: v_cmpx_ne_u16_e64 v255, v2              ; encoding: [0x7e,0x00,0xbd,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ge_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ge_u16_e64
+v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_gt_f16_e64
+v_cmpx_ne_u16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ne_u16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_gt_i16_e64
+v_cmpx_neq_f16 v1, v255
+// GFX12: v_cmpx_neq_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8d,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_gt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_gt_u16_e64
+v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_le_f16_e64
+v_cmpx_neq_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_le_i16_e64
+v_cmpx_neq_f16 v255, v2
+// GFX12: v_cmpx_neq_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8d,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_le_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_le_u16_e64
+v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lg_f16_e64
+v_cmpx_neq_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_neq_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lt_f16_e64
+v_cmpx_nge_f16 v1, v255
+// GFX12: v_cmpx_nge_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x89,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_lt_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lt_i16_e64
+v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lt_u16_e64
+v_cmpx_nge_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ne_i16_e64
+v_cmpx_nge_f16 v255, v2
+// GFX12: v_cmpx_nge_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x89,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ne_u16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ne_u16_e64
+v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_neq_f16_e64
+v_cmpx_nge_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nge_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nge_f16_e64
+v_cmpx_ngt_f16 v1, v255
+// GFX12: v_cmpx_ngt_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8b,0xd4,0x01,0xff,0x03,0x00]
 
 v_cmpx_ngt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ngt_f16_e64
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nle_f16_e64
+v_cmpx_ngt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nlg_f16_e64
+v_cmpx_ngt_f16 v255, v2
+// GFX12: v_cmpx_ngt_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8b,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nlt_f16_e64
+v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_o_f16_e64
+v_cmpx_ngt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_ngt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_u_f16_e64
+v_cmpx_nle_f16 v1, v255
+// GFX12: v_cmpx_nle_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8c,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_class_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_class_f16_e64
+v_cmpx_nle_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_eq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_eq_f16_e64
+v_cmpx_nle_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_eq_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_eq_i16_e64
+v_cmpx_nle_f16 v255, v2
+// GFX12: v_cmpx_nle_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8c,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_eq_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_eq_u16_e64
+v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_ge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ge_f16_e64
+v_cmpx_nle_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nle_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ge_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ge_i16_e64
+v_cmpx_nlg_f16 v1, v255
+// GFX12: v_cmpx_nlg_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8a,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ge_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ge_u16_e64
+v_cmpx_nlg_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_gt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_gt_f16_e64
+v_cmpx_nlg_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_gt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_gt_i16_e64
+v_cmpx_nlg_f16 v255, v2
+// GFX12: v_cmpx_nlg_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8a,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_gt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_gt_u16_e64
+v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_le_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_le_f16_e64
+v_cmpx_nlg_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlg_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_le_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_le_i16_e64
+v_cmpx_nlt_f16 v1, v255
+// GFX12: v_cmpx_nlt_f16_e64 v1, v255             ; encoding: [0x7e,0x00,0x8e,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_le_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_le_u16_e64
+v_cmpx_nlt_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_lg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lg_f16_e64
+v_cmpx_nlt_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_lt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lt_f16_e64
+v_cmpx_nlt_f16 v255, v2
+// GFX12: v_cmpx_nlt_f16_e64 v255, v2             ; encoding: [0x7e,0x00,0x8e,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_lt_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lt_i16_e64
+v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_lt_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_lt_u16_e64
+v_cmpx_nlt_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_nlt_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_ne_i16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ne_i16_e64
+v_cmpx_o_f16 v1, v255
+// GFX12: v_cmpx_o_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x87,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_ne_u16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ne_u16_e64
+v_cmpx_o_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_neq_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_neq_f16_e64
+v_cmpx_o_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
 
-v_cmpx_nge_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nge_f16_e64
+v_cmpx_o_f16 v255, v2
+// GFX12: v_cmpx_o_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x87,0xd4,0xff,0x05,0x02,0x00]
 
-v_cmpx_ngt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_ngt_f16_e64
+v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
 
-v_cmpx_nle_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nle_f16_e64
+v_cmpx_o_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_o_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
 
-v_cmpx_nlg_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nlg_f16_e64
+v_cmpx_u_f16 v1, v255
+// GFX12: v_cmpx_u_f16_e64 v1, v255               ; encoding: [0x7e,0x00,0x88,0xd4,0x01,0xff,0x03,0x00]
 
-v_cmpx_nlt_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_nlt_f16_e64
+v_cmpx_u_f16 v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05]
 
-v_cmpx_o_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_o_f16_e64
+v_cmpx_u_f16 v1, v255 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff]
+
+v_cmpx_u_f16 v255, v2
+// GFX12: v_cmpx_u_f16_e64 v255, v2               ; encoding: [0x7e,0x00,0x88,0xd4,0xff,0x05,0x02,0x00]
 
 v_cmpx_u_f16 v255, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_cmpx_u_f16_e64
+// GFX12: v_cmpx_u_f16_e64_dpp v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05]
+
+v_cmpx_u_f16 v255, v2 quad_perm:[3,2,1,0]
+// GFX12: v_cmpx_u_f16_e64_dpp v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt
index 29e39c9e60ec874..b87c7bb9b268f96 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
 
 # W32: v_cmp_class_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0xfa,0x7c]
 # W64: v_cmp_class_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0xfa,0x7c]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt
index 460b222d0b7d9a3..40735cef0c5360b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp16.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefixes=W64
 
 # W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 # W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt
index 18f9db15c980f8d..00d5106cc90a5a5 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopc_dpp8.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
 
 # W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 # W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt
index 863d747e9c0ba3c..c6019b7fdfa75d6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
 
 # GFX11: v_cmpx_class_f16_e32 v1, v2             ; encoding: [0x01,0x05,0xfa,0x7d]
 0x01,0x05,0xfa,0x7d
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt
index e88d666fb3f935c..d3f92d0358188b6 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp16.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
 
 # GFX11: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
 0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt
index 6c51c9ba5a24f16..3c5b243e497f1ea 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vopcx_dpp8.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX11
 
 # GFX11: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
 0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt
index aa49caacb4fccdc..233c2e1b9d083b8 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt
@@ -157,6 +157,15 @@
 # GFX12: image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LOAD_HT scope:SCOPE_SE r128 a16 tfe d16 ; encoding: [0x75,0x00,0xc0,0xd3,0x00,0x10,0xa4,0x00,0x04,0x05,0x00,0x00]
 0x75,0x00,0xc0,0xd3,0x00,0x10,0xa4,0x00,0x04,0x05,0x00,0x00
 
+# GFX12: image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00]
+0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00
+
+# GFX12: image_load v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00
+
+# GFX12: image_load v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00
+
 # GFX12: image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00]
 0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00
 
@@ -403,6 +412,15 @@
 # GFX12: image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x00,0x80,0x41,0xd0,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00]
 0x00,0x80,0x41,0xd0,0x00,0x00,0x3c,0x00,0x00,0x00,0x00,0x00
 
+# GFX12: image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00]
+0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX12: image_store v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00
+
+# GFX12: image_store v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00
+
 # GFX12: image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00]
 0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00
 
@@ -559,6 +577,15 @@
 # GFX12: image_atomic_swap v[254:255], [v4, v5], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 ; encoding: [0x47,0x80,0xc2,0xd0,0xfe,0xc0,0x00,0x00,0x04,0x05,0x00,0x00]
 0x47,0x80,0xc2,0xd0,0xfe,0xc0,0x00,0x00,0x04,0x05,0x00,0x00
 
+# GFX12: image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00]
+0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX12: image_atomic_swap v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00
+
+# GFX12: image_atomic_swap v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00
+
 # GFX12: image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 
@@ -613,6 +640,15 @@
 # GFX12: image_atomic_add_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT ; encoding: [0x00,0x00,0x43,0xd0,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00]
 0x00,0x00,0x43,0xd0,0x00,0x00,0x20,0x00,0x00,0x00,0x00,0x00
 
+# GFX12: image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00]
+0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00
+
+# GFX12: image_atomic_add_uint v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00]
+0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00
+
+# GFX12: image_atomic_add_uint v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00]
+0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00
+
 # GFX12: image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00]
 0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt
index 320e85238e36e47..8fff403b502ac10 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
 
 # W32: v_cmp_class_f16_e32 vcc_lo, v1, v2        ; encoding: [0x01,0x05,0xfa,0x7c]
 # W64: v_cmp_class_f16_e32 vcc, v1, v2           ; encoding: [0x01,0x05,0xfa,0x7c]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt
index be40d5fc8af9ea3..a840f0a9c2bec55 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp16.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
 
 # W32: v_cmp_class_f16 vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
 # W64: v_cmp_class_f16 vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7c,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt
index 3fb7bef6c18aeb0..0300ff215c35242 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopc_dpp8.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W32
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=W64
 
 # W32: v_cmp_class_f16 vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
 # W64: v_cmp_class_f16 vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7c,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt
index 5a3b1f183ebfa7b..74213ba162ae720 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
 
 # GFX12: v_cmpx_class_f16_e32 v1, v2             ; encoding: [0x01,0x05,0xfa,0x7d]
 0x01,0x05,0xfa,0x7d
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt
index 704a17f8b091fc6..fe9ef4f9e90d0ba 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp16.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
 
 # GFX12: v_cmpx_class_f16 v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff]
 0xfa,0x04,0xfa,0x7d,0x01,0x1b,0x00,0xff
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt
index 0b030b9e316ea46..53f15e8ae43147f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vopcx_dpp8.txt
@@ -1,5 +1,7 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck %s --check-prefix=GFX12
 
 # GFX12: v_cmpx_class_f16 v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05]
 0xe9,0x04,0xfa,0x7d,0x01,0x77,0x39,0x05
diff --git a/llvm/test/MC/Disassembler/X86/apx/msr-imm.txt b/llvm/test/MC/Disassembler/X86/apx/msr-imm.txt
new file mode 100644
index 000000000000000..63465bb7070ea8e
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/apx/msr-imm.txt
@@ -0,0 +1,18 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   rdmsr $123, %r9
+# INTEL: rdmsr r9, 123
+0x62,0xd7,0x7f,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00
+
+# ATT:   rdmsr $123, %r19
+# INTEL: rdmsr r19, 123
+0x62,0xff,0x7f,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00
+
+# ATT:   wrmsrns %r9, $123
+# INTEL: wrmsrns 123, r9
+0x62,0xd7,0x7e,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00
+
+# ATT:   wrmsrns %r19, $123
+# INTEL: wrmsrns 123, r19
+0x62,0xff,0x7e,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/msrimm-64.txt b/llvm/test/MC/Disassembler/X86/msrimm-64.txt
new file mode 100644
index 000000000000000..625d70d739cd349
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/msrimm-64.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   rdmsr $123, %r9
+# INTEL: rdmsr r9, 123
+0xc4,0xc7,0x7b,0xf6,0xc1,0x7b,0x00,0x00,0x00
+
+# ATT:   wrmsrns %r9, $123
+# INTEL: wrmsrns 123, r9
+0xc4,0xc7,0x7a,0xf6,0xc1,0x7b,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 000000000000000..c1cb271a967b13e
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT:        vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT:        vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT:        vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x66,0x28,0xda,0x10
+
+# ATT:        vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x66,0x28,0xda,0x52,0x80
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x66,0x08,0xda,0x10
+
+# ATT:        vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x66,0x08,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0xda,0xd4
+
+# ATT:        vsm4rnds4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x67,0x28,0xda,0x10
+
+# ATT:        vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x67,0x28,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x67,0x08,0xda,0x10
+
+# ATT:        vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x67,0x08,0xda,0x52,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 000000000000000..f89f4b5a8c0fb8f
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT:        vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT:        vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 2332bcacc99d06b..a744a660a7076f6 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -375,6 +375,9 @@
 .attribute arch, "rv32i_svbare1p0"
 # CHECK: attribute      5, "rv32i2p1_svbare1p0"
 
+.attribute arch, "rv32i_svvptc1p0"
+# CHECK: attribute      5, "rv32i2p1_svvptc1p0"
+
 .attribute arch, "rv32i_zfbfmin1p0"
 # CHECK: .attribute     5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0"
 
diff --git a/llvm/test/MC/RISCV/machine-csr-names.s b/llvm/test/MC/RISCV/machine-csr-names.s
index d509f9eadeb5e19..8cfdf7ee116cee6 100644
--- a/llvm/test/MC/RISCV/machine-csr-names.s
+++ b/llvm/test/MC/RISCV/machine-csr-names.s
@@ -1913,66 +1913,6 @@ csrrs t1, mhpmcounter31, zero
 csrrs t2, 0xB1F, zero
 
 
-######################################
-# Machine Counter Setup
-######################################
-# mnscratch
-# name
-# CHECK-INST: csrrs t1, mnscratch, zero
-# CHECK-ENC: encoding: [0x73,0x23,0x00,0x74]
-# CHECK-INST-ALIAS: csrr t1, mnscratch
-# uimm12
-# CHECK-INST: csrrs t2, mnscratch, zero
-# CHECK-ENC: encoding: [0xf3,0x23,0x00,0x74]
-# CHECK-INST-ALIAS: csrr t2, mnscratch
-# name
-csrrs t1, mnscratch, zero
-# uimm12
-csrrs t2, 0x740, zero
-
-# mnepc
-# name
-# CHECK-INST: csrrs t1, mnepc, zero
-# CHECK-ENC: encoding: [0x73,0x23,0x10,0x74]
-# CHECK-INST-ALIAS: csrr t1, mnepc
-# uimm12
-# CHECK-INST: csrrs t2, mnepc, zero
-# CHECK-ENC: encoding: [0xf3,0x23,0x10,0x74]
-# CHECK-INST-ALIAS: csrr t2, mnepc
-# name
-csrrs t1, mnepc, zero
-# uimm12
-csrrs t2, 0x741, zero
-
-# mncause
-# name
-# CHECK-INST: csrrs t1, mncause, zero
-# CHECK-ENC: encoding: [0x73,0x23,0x20,0x74]
-# CHECK-INST-ALIAS: csrr t1, mncause
-# uimm12
-# CHECK-INST: csrrs t2, mncause, zero
-# CHECK-ENC: encoding: [0xf3,0x23,0x20,0x74]
-# CHECK-INST-ALIAS: csrr t2, mncause
-# name
-csrrs t1, mncause, zero
-# uimm12
-csrrs t2, 0x742, zero
-
-# mnstatus
-# name
-# CHECK-INST: csrrs t1, mnstatus, zero
-# CHECK-ENC: encoding: [0x73,0x23,0x40,0x74]
-# CHECK-INST-ALIAS: csrr t1, mnstatus
-# uimm12
-# CHECK-INST: csrrs t2, mnstatus, zero
-# CHECK-ENC: encoding: [0xf3,0x23,0x40,0x74]
-# CHECK-INST-ALIAS: csrr t2, mnstatus
-# name
-csrrs t1, mnstatus, zero
-# uimm12
-csrrs t2, 0x744, zero
-
-
 ######################################
 # Machine Counter Setup
 ######################################
diff --git a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll
index 320b65356ba9f37..b321c0c82ad4d31 100644
--- a/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll
+++ b/llvm/test/MC/WebAssembly/extern-functype-intrinsic.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -o - | FileCheck %s
-; RUN: llc %s -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s
+; RUN: llc %s -mattr=-bulk-memory -o - | FileCheck %s
+; RUN: llc %s -mattr=-bulk-memory -o - | llvm-mc -triple=wasm32-unknown-unknown | FileCheck %s
 
 ; ModuleID = 'test.c'
 source_filename = "test.c"
diff --git a/llvm/test/MC/WebAssembly/libcall.ll b/llvm/test/MC/WebAssembly/libcall.ll
index 8b81f150da892aa..ffd32abe2345bc7 100644
--- a/llvm/test/MC/WebAssembly/libcall.ll
+++ b/llvm/test/MC/WebAssembly/libcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=obj %s -o - | obj2yaml | FileCheck %s
+; RUN: llc -filetype=obj -mattr=-bulk-memory %s -o - | obj2yaml | FileCheck %s
 
 target triple = "wasm32-unknown-unknown"
 
diff --git a/llvm/test/MC/X86/apx/msrimm-att.s b/llvm/test/MC/X86/apx/msrimm-att.s
new file mode 100644
index 000000000000000..e4259f19cb7be45
--- /dev/null
+++ b/llvm/test/MC/X86/apx/msrimm-att.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+# RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR-COUNT-4: error:
+# ERROR-NOT: error:
+
+## rdmsr
+
+// CHECK: {evex} rdmsr $123, %r9
+// CHECK: encoding: [0x62,0xd7,0x7f,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+          {evex} rdmsr $123, %r9
+
+// CHECK: rdmsr $123, %r19
+// CHECK: encoding: [0x62,0xff,0x7f,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00]
+          rdmsr $123, %r19
+
+## wrmsrns
+
+# CHECK: {evex}	wrmsrns %r9, $123
+# CHECK: encoding: [0x62,0xd7,0x7e,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+         {evex}	wrmsrns %r9, $123
+
+# CHECK: wrmsrns %r19, $123
+# CHECK: encoding: [0x62,0xff,0x7e,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00]
+         wrmsrns %r19, $123
diff --git a/llvm/test/MC/X86/apx/msrimm-intel.s b/llvm/test/MC/X86/apx/msrimm-intel.s
new file mode 100644
index 000000000000000..d7eab047dd0cf77
--- /dev/null
+++ b/llvm/test/MC/X86/apx/msrimm-intel.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+## urdmsr
+
+# CHECK: {evex}	rdmsr r9, 123
+# CHECK: encoding: [0x62,0xd7,0x7f,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+         {evex}	rdmsr r9, 123
+
+# CHECK: rdmsr r19, 123
+# CHECK: encoding: [0x62,0xff,0x7f,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00]
+         rdmsr r19, 123
+
+## uwrmsr
+
+# CHECK: {evex}	wrmsrns 123, r9
+# CHECK: encoding: [0x62,0xd7,0x7e,0x08,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+         {evex}	wrmsrns 123, r9
+
+# CHECK: wrmsrns 123, r19
+# CHECK: encoding: [0x62,0xff,0x7e,0x08,0xf6,0xc3,0x7b,0x00,0x00,0x00]
+         wrmsrns 123, r19
diff --git a/llvm/test/MC/X86/msrimm-64-att.s b/llvm/test/MC/X86/msrimm-64-att.s
new file mode 100644
index 000000000000000..e69eb7ff29a61e4
--- /dev/null
+++ b/llvm/test/MC/X86/msrimm-64-att.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+// ERROR-COUNT-2: error:
+// ERROR-NOT: error:
+
+// CHECK: rdmsr $123, %r9
+// CHECK: encoding: [0xc4,0xc7,0x7b,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+          rdmsr $123, %r9
+
+// CHECK: wrmsrns %r9, $123
+// CHECK: encoding: [0xc4,0xc7,0x7a,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+          wrmsrns %r9, $123
+
diff --git a/llvm/test/MC/X86/msrimm-64-intel.s b/llvm/test/MC/X86/msrimm-64-intel.s
new file mode 100644
index 000000000000000..e1ae9c67912365b
--- /dev/null
+++ b/llvm/test/MC/X86/msrimm-64-intel.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: rdmsr r9, 123
+// CHECK: encoding: [0xc4,0xc7,0x7b,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+          rdmsr r9, 123
+
+// CHECK: wrmsrns 123, r9
+// CHECK: encoding: [0xc4,0xc7,0x7a,0xf6,0xc1,0x7b,0x00,0x00,0x00]
+          wrmsrns 123, r9
+
diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s
new file mode 100644
index 000000000000000..de10d95ac74d7ba
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-att.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 %zmm4, %zmm3, %zmm2
+
+// CHECK:      vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4  (%eax), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4  8128(%ecx), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4  -8192(%edx), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 %zmm4, %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4  (%eax), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+
+// CHECK:      {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s
new file mode 100644
index 000000000000000..812fdb13f80913e
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 zmm2, zmm3, zmm4
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 zmm2, zmm3, zmm4
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 ymm2, ymm3, ymm4
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 xmm2, xmm3, xmm4
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 ymm2, ymm3, ymm4
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 xmm2, xmm3, xmm4
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s
new file mode 100644
index 000000000000000..389a29b11897954
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-att.s
@@ -0,0 +1,224 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 %zmm24, %zmm23, %zmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4  8128(%rcx), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4  -8192(%rdx), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 %zmm24, %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s
new file mode 100644
index 000000000000000..3cc18cf4178ed88
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 zmm22, zmm23, zmm24
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 zmm22, zmm23, zmm24
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 ymm22, ymm23, ymm24
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 xmm22, xmm23, xmm24
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 ymm22, ymm23, ymm24
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 xmm22, xmm23, xmm24
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
diff --git a/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir
new file mode 100644
index 000000000000000..892a4298bbdb518
--- /dev/null
+++ b/llvm/test/MachineVerifier/AMDGPU/issue98474-missing-def-liveout-physical-subregister.mir
@@ -0,0 +1,36 @@
+# XFAIL: *
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -run-pass=none -filetype=null %s
+
+# FIXME: This should fail the machine verifier. There is a missing def
+# of $vgpr2 in bb.1, which is needed since it's live into bb.3
+
+---
+name: missing_live_out_subreg_def
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $vgpr0
+
+    S_CMP_EQ_U32 $sgpr0, 0, implicit-def $scc
+    S_CBRANCH_SCC0 %bb.2, implicit killed $scc
+
+  bb.1:
+    liveins: $vgpr0
+
+    renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    S_BRANCH %bb.3
+
+  bb.2:
+    liveins: $vgpr0
+
+    renamable $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
+    renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    renamable $vgpr1_vgpr2 = BUFFER_LOAD_FORMAT_XY_IDXEN killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), addrspace 8)
+
+  bb.3:
+    liveins: $vgpr0, $vgpr1_vgpr2
+
+    EXP 0, killed renamable $vgpr0, killed renamable $vgpr1, renamable $vgpr2, renamable $vgpr0, 0, 0, 0, implicit $exec
+    S_ENDPGM 0
+
+...
diff --git a/llvm/test/TableGen/64-bit-int.td b/llvm/test/TableGen/64-bit-int.td
index 2d2bdb8b560e2c3..d2a2999c14e9913 100644
--- a/llvm/test/TableGen/64-bit-int.td
+++ b/llvm/test/TableGen/64-bit-int.td
@@ -16,7 +16,7 @@ def {
 #ifdef OOR3
   bits<64> Val = 0x10000000000000000;
 #endif
-// CHECK-OOR: error: Number out of range
+// CHECK-OOR: error: number out of range
 
   bits<64> BinVal0 = 0x8000000000000000;
   bits<64> HexVal0 = 0b1000000000000000000000000000000000000000000000000000000000000000;
diff --git a/llvm/test/TableGen/invalid-macro-name-command-line.td b/llvm/test/TableGen/invalid-macro-name-command-line.td
index 0d2307997ebe545..7d19e8996639af8 100644
--- a/llvm/test/TableGen/invalid-macro-name-command-line.td
+++ b/llvm/test/TableGen/invalid-macro-name-command-line.td
@@ -3,7 +3,7 @@
 // RUN: not llvm-tblgen %s -D_MAC# 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-3
 // RUN: not llvm-tblgen %s -D 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-4
 
-// CHECK-TEST-1: error: Invalid macro name `MACRO=1` specified on command line
-// CHECK-TEST-2: error: Invalid macro name `0MAC` specified on command line
-// CHECK-TEST-3: error: Invalid macro name `_MAC#` specified on command line
+// CHECK-TEST-1: error: invalid macro name `MACRO=1` specified on command line
+// CHECK-TEST-2: error: invalid macro name `0MAC` specified on command line
+// CHECK-TEST-3: error: invalid macro name `_MAC#` specified on command line
 // CHECK-TEST-4: for the -D option: requires a value!
diff --git a/llvm/test/TableGen/lit.local.cfg b/llvm/test/TableGen/lit.local.cfg
index 0e827479cd41235..9d6dfdc14bbfb06 100644
--- a/llvm/test/TableGen/lit.local.cfg
+++ b/llvm/test/TableGen/lit.local.cfg
@@ -1,2 +1,10 @@
+import platform
+import lit.formats
+
 config.suffixes = [".td"]
 config.excludes = ["Common", "Inputs"]
+
+# AIX 'diff' command doesn't support --strip-trailing-cr, but the internal
+# python implementation does, so use that for cross platform compatibility
+if platform.system() == "AIX":
+    config.test_format = lit.formats.ShTest()
diff --git a/llvm/test/TableGen/prep-diag1.td b/llvm/test/TableGen/prep-diag1.td
index 41b7d477c6942e3..27f428f4fe95989 100644
--- a/llvm/test/TableGen/prep-diag1.td
+++ b/llvm/test/TableGen/prep-diag1.td
@@ -4,22 +4,22 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG3 %s
 
 #ifdef DIAG1
-// DIAG1: error: Only comments are supported after #define NAME
+// DIAG1: error: only comments are supported after #define NAME
 #define ENABLED1/*
 */class C;
 #endif // DIAG1
 
 #ifdef DIAG4
-// DIAG4: warning: Duplicate definition of macro: ENABLED1
+// DIAG4: warning: duplicate definition of macro: ENABLED1
 #define ENABLED1
 #define ENABLED1
 #endif // DIAG4
 
 #ifdef DIAG2
-// DIAG2: error: Only comments are supported after #ifdef NAME
+// DIAG2: error: only comments are supported after #ifdef NAME
 
 // Invalid #ifdef below should be detected even if DIAG2 is not defined.
-// DIAG3: error: Only comments are supported after #ifdef NAME
+// DIAG3: error: only comments are supported after #ifdef NAME
 #ifdef DIAG2/*
 */class C;
 #endif
diff --git a/llvm/test/TableGen/prep-diag10.td b/llvm/test/TableGen/prep-diag10.td
index eb387a07b066ca4..cfcbab094ad73b7 100644
--- a/llvm/test/TableGen/prep-diag10.td
+++ b/llvm/test/TableGen/prep-diag10.td
@@ -1,6 +1,6 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Reached EOF without matching #endif
-// CHECK: error: The latest preprocessor control is here
+// CHECK: error: reached EOF without matching #endif
+// CHECK: error: the latest preprocessor control is here
 #ifdef DISABLED
 #else
diff --git a/llvm/test/TableGen/prep-diag11.td b/llvm/test/TableGen/prep-diag11.td
index 0042bc04f9e1014..1fe8a8503076e5f 100644
--- a/llvm/test/TableGen/prep-diag11.td
+++ b/llvm/test/TableGen/prep-diag11.td
@@ -1,7 +1,7 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Reached EOF without matching #endif
-// CHECK: error: The latest preprocessor control is here
+// CHECK: error: reached EOF without matching #endif
+// CHECK: error: the latest preprocessor control is here
 #ifdef DISABLED
 #else
 #define ENABLED
diff --git a/llvm/test/TableGen/prep-diag12.td b/llvm/test/TableGen/prep-diag12.td
index c26301ee17ac2b1..02ffa672b2fa050 100644
--- a/llvm/test/TableGen/prep-diag12.td
+++ b/llvm/test/TableGen/prep-diag12.td
@@ -1,7 +1,7 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Reached EOF without matching #endif
-// CHECK: error: The latest preprocessor control is here
+// CHECK: error: reached EOF without matching #endif
+// CHECK: error: the latest preprocessor control is here
 #ifdef DISABLED
 #else
 #define ENABLED
diff --git a/llvm/test/TableGen/prep-diag13.td b/llvm/test/TableGen/prep-diag13.td
index aa3fdab4802d379..733a46a16181318 100644
--- a/llvm/test/TableGen/prep-diag13.td
+++ b/llvm/test/TableGen/prep-diag13.td
@@ -1,7 +1,7 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Reached EOF without matching #endif
-// CHECK: error: The latest preprocessor control is here
+// CHECK: error: reached EOF without matching #endif
+// CHECK: error: the latest preprocessor control is here
 #ifdef DISABLED
 /*
 #else
diff --git a/llvm/test/TableGen/prep-diag14.td b/llvm/test/TableGen/prep-diag14.td
index cae9bc3b7f5b6c8..a3216ee4f471251 100644
--- a/llvm/test/TableGen/prep-diag14.td
+++ b/llvm/test/TableGen/prep-diag14.td
@@ -1,6 +1,6 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Reached EOF without matching #endif
-// CHECK: error: The latest preprocessor control is here
+// CHECK: error: reached EOF without matching #endif
+// CHECK: error: the latest preprocessor control is here
 #ifdef DISABLED
 // #endif
diff --git a/llvm/test/TableGen/prep-diag2.td b/llvm/test/TableGen/prep-diag2.td
index 741026b9c8a2d64..e51490600ff64f5 100644
--- a/llvm/test/TableGen/prep-diag2.td
+++ b/llvm/test/TableGen/prep-diag2.td
@@ -2,10 +2,10 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s
 
 #ifdef DIAG1
-// DIAG1: error: Only comments are supported after #else
+// DIAG1: error: only comments are supported after #else
 
 // Invalid #else below should be detected even if DIAG1 is not defined.
-// DIAG2: error: Only comments are supported after #else
+// DIAG2: error: only comments are supported after #else
 #ifdef DIAG2//DIAG2
 #else/*
 */class C;
diff --git a/llvm/test/TableGen/prep-diag3.td b/llvm/test/TableGen/prep-diag3.td
index fbedfa290b9947d..0b4d40307b40b06 100644
--- a/llvm/test/TableGen/prep-diag3.td
+++ b/llvm/test/TableGen/prep-diag3.td
@@ -2,10 +2,10 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s
 
 #ifdef DIAG1
-// DIAG1: error: Only comments are supported after #endif
+// DIAG1: error: only comments are supported after #endif
 
 // Invalid #else below should be detected even if DIAG1 is not defined.
-// DIAG2: error: Only comments are supported after #endif
+// DIAG2: error: only comments are supported after #endif
 #ifdef DIAG2//DIAG2
 #else/*!DIAG2*/
 #endif/* !DIAG2
diff --git a/llvm/test/TableGen/prep-diag4.td b/llvm/test/TableGen/prep-diag4.td
index 4661ef8667d23fc..ead116ebde0de8e 100644
--- a/llvm/test/TableGen/prep-diag4.td
+++ b/llvm/test/TableGen/prep-diag4.td
@@ -1,7 +1,7 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
 // CHECK: error: double #else
-// CHECK: error: Previous #else is here
+// CHECK: error: previous #else is here
 #ifdef DIAG1
 #else
 #else
diff --git a/llvm/test/TableGen/prep-diag6.td b/llvm/test/TableGen/prep-diag6.td
index f4202d115da59a4..bf1cd3d3490b5e2 100644
--- a/llvm/test/TableGen/prep-diag6.td
+++ b/llvm/test/TableGen/prep-diag6.td
@@ -1,6 +1,6 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Expected macro name after #ifdef
+// CHECK: error: expected macro name after #ifdef
 #ifdef
 #else
 #else
diff --git a/llvm/test/TableGen/prep-diag8.td b/llvm/test/TableGen/prep-diag8.td
index 7a7bde62c79c4e1..82797d6cf4a62d8 100644
--- a/llvm/test/TableGen/prep-diag8.td
+++ b/llvm/test/TableGen/prep-diag8.td
@@ -1,5 +1,5 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Expected macro name after #define
+// CHECK: error: expected macro name after #define
 #define
 #endif
diff --git a/llvm/test/TableGen/prep-diag9.td b/llvm/test/TableGen/prep-diag9.td
index 4ecff575cdc7bbd..6ad208104301bc4 100644
--- a/llvm/test/TableGen/prep-diag9.td
+++ b/llvm/test/TableGen/prep-diag9.td
@@ -1,5 +1,5 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Reached EOF without matching #endif
-// CHECK: error: The latest preprocessor control is here
+// CHECK: error: reached EOF without matching #endif
+// CHECK: error: the latest preprocessor control is here
 #ifdef DISABLED
diff --git a/llvm/test/TableGen/prep-ifndef-diag-1.td b/llvm/test/TableGen/prep-ifndef-diag-1.td
index 941f2d377a98a74..4a0d0754ed79063 100644
--- a/llvm/test/TableGen/prep-ifndef-diag-1.td
+++ b/llvm/test/TableGen/prep-ifndef-diag-1.td
@@ -1,4 +1,4 @@
 // RUN: not llvm-tblgen %s 2>&1 | FileCheck %s
 
-// CHECK: error: Expected macro name after #ifndef
+// CHECK: error: expected macro name after #ifndef
 #ifndef 1
diff --git a/llvm/test/TableGen/prep-ifndef-diag-2.td b/llvm/test/TableGen/prep-ifndef-diag-2.td
index 7b5f9dfd24b7861..c89cbab08e5c5cd 100644
--- a/llvm/test/TableGen/prep-ifndef-diag-2.td
+++ b/llvm/test/TableGen/prep-ifndef-diag-2.td
@@ -1,4 +1,4 @@
 // RUN: not llvm-tblgen %s 2>&1 | FileCheck %s
 
-// CHECK: error: Only comments are supported after #ifndef NAME
+// CHECK: error: only comments are supported after #ifndef NAME
 #ifndef MACRO 42
diff --git a/llvm/test/TableGen/unterminated-c-comment.td b/llvm/test/TableGen/unterminated-c-comment.td
index 0f4cd9d633c66d9..b5b995342be744a 100644
--- a/llvm/test/TableGen/unterminated-c-comment.td
+++ b/llvm/test/TableGen/unterminated-c-comment.td
@@ -1,5 +1,5 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Unterminated comment!
+// CHECK: error: unterminated comment
 
 include "unterminated-c-comment-include.inc" */
diff --git a/llvm/test/TableGen/unterminated-code-block.td b/llvm/test/TableGen/unterminated-code-block.td
index d6b6f50827a6725..5bd4cd7e17d827c 100644
--- a/llvm/test/TableGen/unterminated-code-block.td
+++ b/llvm/test/TableGen/unterminated-code-block.td
@@ -1,5 +1,5 @@
 // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s
 
-// CHECK: error: Unterminated code block
+// CHECK: error: unterminated code block
 
 include "unterminated-code-block-include.inc" }]>;
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1c..21f3c8593a710b0 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1959,8 +1959,11 @@ static const X86FoldTableEntry Table1[] = {
   {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE},
   {X86::VUCOMISSrr, X86::VUCOMISSrm, 0},
   {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE},
+  {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0},
   {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE},
+  {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0},
   {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE},
+  {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0},
   {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE},
   {X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0},
   {X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0},
@@ -4113,8 +4116,14 @@ static const X86FoldTableEntry Table2[] = {
   {X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0},
   {X86::VSHUFPSrri, X86::VSHUFPSrmi, 0},
   {X86::VSM4KEY4Yrr, X86::VSM4KEY4Yrm, 0},
+  {X86::VSM4KEY4Z128rr, X86::VSM4KEY4Z128rm, 0},
+  {X86::VSM4KEY4Z256rr, X86::VSM4KEY4Z256rm, 0},
+  {X86::VSM4KEY4Zrr, X86::VSM4KEY4Zrm, 0},
   {X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0},
   {X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0},
+  {X86::VSM4RNDS4Z128rr, X86::VSM4RNDS4Z128rm, 0},
+  {X86::VSM4RNDS4Z256rr, X86::VSM4RNDS4Z256rm, 0},
+  {X86::VSM4RNDS4Zrr, X86::VSM4RNDS4Zrm, 0},
   {X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0},
   {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0},
   {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0},
diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll
index f17e19e1f77ef25..99e071898765567 100644
--- a/llvm/test/ThinLTO/X86/memprof-icp.ll
+++ b/llvm/test/ThinLTO/X86/memprof-icp.ll
@@ -186,9 +186,13 @@
 ; REMARKS-MAIN: created clone _ZN2B03barEj.memprof.1
 ; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold
 ; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold
+; REMARKS-MAIN: call in clone _ZN2B03barEj marked with memprof allocation attribute notcold
+; REMARKS-MAIN: call in clone _ZN2B03barEj.memprof.1 marked with memprof allocation attribute cold
 ; REMARKS-MAIN: created clone _ZN1B3barEj.memprof.1
 ; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
 ; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
+; REMARKS-MAIN: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
+; REMARKS-MAIN: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
 ; REMARKS-FOO: created clone _Z3fooR2B0j.memprof.1
 ;; In each version of foo we should have promoted the indirect call to two conditional
 ;; direct calls, one to B::bar and one to B0::bar. The cloned version of foo should call
@@ -208,10 +212,10 @@
 ; REMARKS-FOO: call in clone _ZN1B3barEj marked with memprof allocation attribute notcold
 ; REMARKS-FOO: call in clone _ZN1B3barEj.memprof.1 marked with memprof allocation attribute cold
 
-; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis
-; STATS-BE: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
-; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis
-; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
+; STATS: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during whole program analysis
+; STATS-BE: 8 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
+; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during whole program analysis
+; STATS-BE: 8 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
 ; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
 ; STATS-BE: 5 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
 
@@ -247,8 +251,8 @@
 ; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
 ; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
 
-; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
-; STATS-BE-DISTRIB: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
+; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
+; STATS-BE-DISTRIB: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
 ; STATS-BE-DISTRIB: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
 
 ;--- foo.ll
@@ -298,6 +302,9 @@ declare i32 @_Z3fooR2B0j(ptr, i32)
 define i32 @_ZN2B03barEj(ptr %this, i32 %s) {
 entry:
   %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !33, !callsite !38
+  ;; Second allocation in this function, to ensure that indirect edges to the
+  ;; same callee are partitioned correctly.
+  %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !45, !callsite !50
   store volatile i32 0, ptr %call, align 4
   ret i32 0
 }
@@ -311,6 +318,9 @@ declare void @_ZdlPvm()
 define i32 @_ZN1B3barEj(ptr %this, i32 %s) {
 entry:
   %call = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !39, !callsite !44
+  ;; Second allocation in this function, to ensure that indirect edges to the
+  ;; same callee are partitioned correctly.
+  %call2 = tail call ptr @_Znwm(i64 noundef 4) #0, !memprof !51, !callsite !56
   store volatile i32 0, ptr %call, align 4
   ret i32 0
 }
@@ -367,3 +377,15 @@ attributes #0 = { builtin allocsize(0) }
 !42 = !{!43, !"cold"}
 !43 = !{i64 4457553070050523782, i64 -2101080423462424381, i64 -6490791336773930154}
 !44 = !{i64 4457553070050523782}
+!45 = !{!46, !48}
+!46 = !{!47, !"notcold"}
+!47 = !{i64 456, i64 -2101080423462424381, i64 5188446645037944434}
+!48 = !{!49, !"cold"}
+!49 = !{i64 456, i64 -2101080423462424381, i64 5583420417449503557}
+!50 = !{i64 456}
+!51 = !{!52, !54}
+!52 = !{!53, !"notcold"}
+!53 = !{i64 789, i64 -2101080423462424381, i64 132626519179914298}
+!54 = !{!55, !"cold"}
+!55 = !{i64 789, i64 -2101080423462424381, i64 -6490791336773930154}
+!56 = !{i64 789}
diff --git a/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll
new file mode 100644
index 000000000000000..6635280bc436039
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s
+
+define float @phi_select(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define float @phi_select
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]]
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[SELECT]]
+;
+entry:
+  br label %loop
+
+loop:
+  %phi = phi float [ %base, %entry ], [ %select, %loop ]
+  %select = select i1 %c, float %phi, float %arg
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret float %select
+}
+
+define float @phi_select_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) {
+; CHECK-LABEL: define float @phi_select_onlybase
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]]
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[SELECT]]
+;
+entry:
+  br label %loop
+
+loop:
+  %phi = phi float [ %base, %entry ], [ %select, %loop ]
+  %select = select i1 %c, float %phi, float %arg
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret float %select
+}
+
+define float @phi_select_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define float @phi_select_onlyarg
+; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]]
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[SELECT]]
+;
+entry:
+  br label %loop
+
+loop:
+  %phi = phi float [ %base, %entry ], [ %select, %loop ]
+  %select = select i1 %c, float %phi, float %arg
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret float %select
+}
+
+define float @phi_phi(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define float @phi_phi
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ]
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ]
+; CHECK-NEXT:    br i1 [[C]], label [[INNER]], label [[EXIT1]]
+; CHECK:       exit1:
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[PHI2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ]
+  br label %inner
+
+inner:
+  %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ]
+  br i1 %c, label %inner, label %exit1
+
+exit1:
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret float %phi2
+}
+
+define float @phi_phi_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) {
+; CHECK-LABEL: define float @phi_phi_onlybase
+; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ]
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ]
+; CHECK-NEXT:    br i1 [[C]], label [[INNER]], label [[EXIT1]]
+; CHECK:       exit1:
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[PHI2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ]
+  br label %inner
+
+inner:
+  %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ]
+  br i1 %c, label %inner, label %exit1
+
+exit1:
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret float %phi2
+}
+
+define float @phi_phi_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) {
+; CHECK-LABEL: define float @phi_phi_onlyarg
+; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ]
+; CHECK-NEXT:    br label [[INNER:%.*]]
+; CHECK:       inner:
+; CHECK-NEXT:    [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ]
+; CHECK-NEXT:    br i1 [[C]], label [[INNER]], label [[EXIT1]]
+; CHECK:       exit1:
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[PHI2]]
+;
+entry:
+  br label %loop
+
+loop:
+  %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ]
+  br label %inner
+
+inner:
+  %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ]
+  br i1 %c, label %inner, label %exit1
+
+exit1:
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret float %phi2
+}
diff --git a/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll b/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll
index bdfad0d6e44e9c4..3871822c9dc17a5 100644
--- a/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll
+++ b/llvm/test/Transforms/EarlyCSE/defaultfp-strictfp.ll
@@ -246,8 +246,8 @@ define i1 @multiple_fcmp(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -262,9 +262,9 @@ define i1 @multiple_fcmp_split(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   call void @arbitraryfunc() #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -278,8 +278,8 @@ define i1 @multiple_fcmps(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -294,9 +294,9 @@ define i1 @multiple_fcmps_split(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   call void @arbitraryfunc() #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -318,5 +318,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
 declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
diff --git a/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll b/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll
index fafc7ccbb38c1f7..f2675ce7816a4e4 100644
--- a/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll
+++ b/llvm/test/Transforms/EarlyCSE/ebstrict-strictfp.ll
@@ -132,8 +132,8 @@ define i1 @fcmp_strict(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -149,8 +149,8 @@ define i1 @fcmps_strict(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -172,5 +172,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
 declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
diff --git a/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll b/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll
index f3b857ab2f4874c..b79f7018b8d0d55 100644
--- a/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll
+++ b/llvm/test/Transforms/EarlyCSE/mixed-strictfp.ll
@@ -339,8 +339,8 @@ define i1 @mixed_fcmp_maytrap(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -356,8 +356,8 @@ define i1 @mixed_fcmp_strict(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -373,8 +373,8 @@ define i1 @mixed_fcmps_maytrap(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -390,8 +390,8 @@ define i1 @mixed_fcmps_strict(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @bar.i32(i32 [[TMP3]], i32 [[TMP4]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.strict") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -413,5 +413,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
 declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
diff --git a/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll b/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll
index 8772f208ebe47ec..3acf5597dfc3fe6 100644
--- a/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll
+++ b/llvm/test/Transforms/EarlyCSE/nonmixed-strictfp.ll
@@ -313,8 +313,8 @@ define i1 @fcmp_defaultenv(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -328,8 +328,8 @@ define i1 @fcmp_maytrap(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmp.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmp.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -343,8 +343,8 @@ define i1 @fcmps_defaultenv(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.ignore") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -358,8 +358,8 @@ define i1 @fcmps_maytrap(double %a, double %b) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @bar.i32(i32 [[TMP2]], i32 [[TMP2]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
-  %1 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
-  %2 = call i1 @llvm.experimental.constrained.fcmps.i1.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
+  %1 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
+  %2 = call i1 @llvm.experimental.constrained.fcmps.f64(double %a, double %b, metadata !"oeq", metadata !"fpexcept.maytrap") #0
   %3 = zext i1 %1 to i32
   %4 = zext i1 %2 to i32
   %5 = call i32 @bar.i32(i32 %3, i32 %4) #0
@@ -381,5 +381,5 @@ declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
 declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmp.i1.f64(double, double, metadata, metadata)
-declare i1 @llvm.experimental.constrained.fcmps.i1.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata)
diff --git a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
index f29cf0d123939a9..7291d83b816115a 100644
--- a/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/compiler-crash-58759.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
+; RUN: opt -S --passes="default<O3>" -funcspec-for-literal-constant=false < %s | FileCheck %s
 
 define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
 entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
index 16a468511631293..0c24169d02c2c56 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization-constant-expression.ll
@@ -4,7 +4,7 @@
 ; Note that this test case shows that function specialization pass would
 ; transform the function even if no specialization happened.
 
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s
 
 %struct = type { i8, i16, i32, i64, i64}
 @Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
index ef830a0e9a4a9e7..6f36a394979d81b 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization2.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER
-; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-for-literal-constant=false -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE-ITER
+; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-for-literal-constant=false -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
 
 
 define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
index 4e5a196d6682912..a6a990c34159363 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
@@ -1,8 +1,10 @@
 ; RUN: opt -passes="ipsccp<func-spec>" -force-specialization \
-; RUN:   -funcspec-max-clones=2 -S < %s | FileCheck %s
+; RUN:   -funcspec-for-literal-constant=false -funcspec-max-clones=2 \
+; RUN:   -S < %s | FileCheck %s
 
 ; RUN: opt -passes="ipsccp<func-spec>" -force-specialization \
-; RUN:   -funcspec-max-clones=1 -S < %s | FileCheck %s --check-prefix=CONST1
+; RUN:   -funcspec-for-literal-constant=false -funcspec-max-clones=1 \
+; RUN:   -S < %s | FileCheck %s --check-prefix=CONST1
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
diff --git a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
index dfa1e5a42776a5f..2f42125d8cf9799 100644
--- a/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/get-possible-constants.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false < %s | FileCheck %s
 define dso_local i32 @p0(i32 noundef %x) {
 entry:
   %add = add nsw i32 %x, 1
diff --git a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
index 1926e29ddee0136..06185332f22e0cd 100644
--- a/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/global-rank.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
+; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=1 < %s | FileCheck %s
 define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
 entry:
   %call = tail call i32 %p(i32 noundef %x)
diff --git a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
index 930ed6627f7f1e6..97d77971a92d3b1 100644
--- a/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/identical-specializations.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
-; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -funcspec-for-literal-constant=false -S < %s | FileCheck %s
 
 define i64 @main(i64 %x, i64 %y, i1 %flag) {
 entry:
diff --git a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
index 3eae3dc261fb2ac..7d5e506064af778 100644
--- a/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/literal-const.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
+; RUN:        -funcspec-for-literal-constant=false \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
 ; RUN: opt -S --passes="ipsccp<func-spec>" \
-; RUN:        -funcspec-for-literal-constant \
 ; RUN:        -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
 
 define i32 @f0(i32 noundef %x) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
new file mode 100644
index 000000000000000..82d1f7ae4a6e160
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/maxgrowth.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=1       \
+; RUN:                                 -funcspec-for-literal-constant=true \
+; RUN:                                 -funcspec-min-codesize-savings=50   \
+; RUN:                                 -funcspec-min-latency-savings=50    \
+; RUN:                                 -funcspec-max-codesize-growth=1     \
+; RUN:                                 -S < %s | FileCheck %s
+
+; Verify that we are able to specialize a function successfully after analysis
+; of other specializations that are found to not be profitable.
+define void @test_specialize_after_failed_analysis(i32 %n) {
+entry:
+  %notspec0 = call i32 @add(i32 0, i32 %n)
+  %notspec1 = call i32 @add(i32 1, i32 %n)
+  %spec = call i32 @add(i32 1, i32 1)
+  ret void
+}
+
+define i32 @add(i32 %x, i32 %y) {
+entry:
+  %res = add i32 %x, %y
+  ret i32 %res
+}
+; CHECK-LABEL: define void @test_specialize_after_failed_analysis(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[NOTSPEC0:%.*]] = call i32 @add(i32 0, i32 [[N]])
+; CHECK-NEXT:    [[NOTSPEC1:%.*]] = call i32 @add(i32 1, i32 [[N]])
+; CHECK-NEXT:    [[SPEC:%.*]] = call i32 @add.specialized.1(i32 1, i32 1)
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define i32 @add(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[X]], [[Y]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i32 @add.specialized.1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    ret i32 poison
+;
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
index a653760abb2cc65..73291600edb85de 100644
--- a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
-; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -passes="ipsccp<func-spec>" -funcspec-for-literal-constant=false -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
 
 ; Make sure that we iterate correctly after sorting the specializations:
 ; FnSpecialization: Specializations for function compute
diff --git a/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
new file mode 100644
index 000000000000000..f4ba0e72a1b4397
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/track-ptr-return.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 5
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization \
+; RUN:     -funcspec-max-iters=3 -S < %s | FileCheck %s
+
+; RUN: opt -passes="ipsccp<func-spec>" -force-specialization          \
+; RUN:     -funcspec-for-literal-constant=false -funcspec-max-iters=3 \
+; RUN:     -S < %s | FileCheck %s --check-prefix=NOLIT
+
+@global_true = constant i1 true
+@global_false = constant i1 false
+
+define i64 @main() {
+entry:
+  %op1 = call ptr @select_op(ptr @global_true)
+  %op2 = call ptr @select_op(ptr @global_false)
+
+  %c1 = call i64 @compute(ptr %op1)
+  %c2 = call i64 @compute(ptr %op2)
+  %add = add i64 %c1, %c2
+  ret i64 %add
+}
+
+define ptr @select_op(ptr %flag) {
+  %flag.val = load i1, ptr %flag
+  %op = select i1 %flag.val, ptr @plus, ptr @minus
+  ret ptr %op
+}
+
+define internal i64 @compute(ptr %op) {
+entry:
+  %res = call i64 %op(i64 1)
+  ret i64 %res
+}
+
+define internal i64 @plus(i64 %x) {
+entry:
+  %sum = add i64 %x, 1
+  ret i64 %sum
+}
+
+define internal i64 @minus(i64 %x) {
+entry:
+  %diff = sub i64 %x, 1
+  ret i64 %diff
+}
+; CHECK-LABEL: define i64 @main() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true)
+; CHECK-NEXT:    [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false)
+; CHECK-NEXT:    [[C1:%.*]] = call i64 @compute.specialized.3(ptr @plus)
+; CHECK-NEXT:    [[C2:%.*]] = call i64 @compute.specialized.4(ptr @minus)
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
+; CHECK-NEXT:    ret i64 [[ADD]]
+;
+;
+; CHECK-LABEL: define ptr @select_op(
+; CHECK-SAME: ptr [[FLAG:%.*]]) {
+; CHECK-NEXT:    [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1
+; CHECK-NEXT:    [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
+; CHECK-NEXT:    ret ptr [[OP]]
+;
+;
+; CHECK-LABEL: define internal i64 @plus(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SUM:%.*]] = add i64 [[X]], 1
+; CHECK-NEXT:    ret i64 [[SUM]]
+;
+;
+; CHECK-LABEL: define internal i64 @minus(
+; CHECK-SAME: i64 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[X]], 1
+; CHECK-NEXT:    ret i64 [[DIFF]]
+;
+;
+; CHECK-LABEL: define internal ptr @select_op.specialized.1(
+; CHECK-SAME: ptr [[FLAG:%.*]]) {
+; CHECK-NEXT:    ret ptr poison
+;
+;
+; CHECK-LABEL: define internal ptr @select_op.specialized.2(
+; CHECK-SAME: ptr [[FLAG:%.*]]) {
+; CHECK-NEXT:    ret ptr poison
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.3(
+; CHECK-SAME: ptr [[OP:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @plus(i64 1)
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+;
+; CHECK-LABEL: define internal i64 @compute.specialized.4(
+; CHECK-SAME: ptr [[OP:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[RES:%.*]] = call i64 @minus(i64 1)
+; CHECK-NEXT:    ret i64 [[RES]]
+;
+;
+; NOLIT-LABEL: define i64 @main() {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[OP1:%.*]] = call ptr @select_op.specialized.1(ptr @global_true)
+; NOLIT-NEXT:    [[OP2:%.*]] = call ptr @select_op.specialized.2(ptr @global_false)
+; NOLIT-NEXT:    [[C1:%.*]] = call i64 @compute(ptr @plus)
+; NOLIT-NEXT:    [[C2:%.*]] = call i64 @compute(ptr @minus)
+; NOLIT-NEXT:    [[ADD:%.*]] = add i64 [[C1]], [[C2]]
+; NOLIT-NEXT:    ret i64 [[ADD]]
+;
+;
+; NOLIT-LABEL: define ptr @select_op(
+; NOLIT-SAME: ptr [[FLAG:%.*]]) {
+; NOLIT-NEXT:    [[FLAG_VAL:%.*]] = load i1, ptr [[FLAG]], align 1
+; NOLIT-NEXT:    [[OP:%.*]] = select i1 [[FLAG_VAL]], ptr @plus, ptr @minus
+; NOLIT-NEXT:    ret ptr [[OP]]
+;
+;
+; NOLIT-LABEL: define internal i64 @compute(
+; NOLIT-SAME: ptr [[OP:%.*]]) {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[RES:%.*]] = call i64 [[OP]](i64 1)
+; NOLIT-NEXT:    ret i64 [[RES]]
+;
+;
+; NOLIT-LABEL: define internal i64 @plus(
+; NOLIT-SAME: i64 [[X:%.*]]) {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[SUM:%.*]] = add i64 [[X]], 1
+; NOLIT-NEXT:    ret i64 [[SUM]]
+;
+;
+; NOLIT-LABEL: define internal i64 @minus(
+; NOLIT-SAME: i64 [[X:%.*]]) {
+; NOLIT-NEXT:  [[ENTRY:.*:]]
+; NOLIT-NEXT:    [[DIFF:%.*]] = sub i64 [[X]], 1
+; NOLIT-NEXT:    ret i64 [[DIFF]]
+;
+;
+; NOLIT-LABEL: define internal ptr @select_op.specialized.1(
+; NOLIT-SAME: ptr [[FLAG:%.*]]) {
+; NOLIT-NEXT:    ret ptr poison
+;
+;
+; NOLIT-LABEL: define internal ptr @select_op.specialized.2(
+; NOLIT-SAME: ptr [[FLAG:%.*]]) {
+; NOLIT-NEXT:    ret ptr poison
+;
diff --git a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
index 3e9b2d94efda89d..d8266f4c6703dd6 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -667,6 +667,15 @@ declare float @logf(float)
 ; CHECK: declare x86_fp80 @logl(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
 declare x86_fp80 @logl(x86_fp80)
 
+; CHECK: declare double @tgamma(double) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare double @tgamma(double)
+
+; CHECK: declare float @tgammaf(float) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare float @tgammaf(float)
+
+; CHECK: declare x86_fp80 @tgammal(x86_fp80) [[NOFREE_NOUNWIND_WILLRETURN_WRITEONLY]]
+declare x86_fp80 @tgammal(x86_fp80)
+
 ; CHECK: declare noundef i32 @lstat(ptr nocapture noundef readonly, ptr nocapture noundef) [[NOFREE_NOUNWIND]]
 declare i32 @lstat(ptr, ptr)
 
diff --git a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
index 8b472aa5af09024..b5ae08e1daa3afa 100644
--- a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
+++ b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
@@ -1,13 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -S -passes='instcombine<no-verify-fixpoint>' | FileCheck %s
+; RUN: opt < %s -S -passes=instcombine | FileCheck %s
 
 ; We do not reach a fixpoint, because we first have to infer nsw on the IV add,
 ; and could eliminate the icmp slt afterwards, but don't revisit it.
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 
-define i32 @test() {
-; CHECK-LABEL: define i32 @test() {
+define i32 @test() "instcombine-no-verify-fixpoint" {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 true, label [[BB_I:%.*]], label [[CALCULATECOLORSPECIFICBLACKLEVEL_EXIT:%.*]]
 ; CHECK:       bb.i:
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index 5a9767a64a2ced0..9f8d3e317accc73 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -172,3 +172,87 @@ define i1 @test_ne_cp2_other_okay2(i8 %x, i8 %yy) {
   %r = icmp ne i8 %and_x_y, %and_x_neg_y
   ret i1 %r
 }
+
+define i1 @test_eq_0_and_15_add_1(i8 %a) {
+; CHECK-LABEL: @test_eq_0_and_15_add_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP0]], 15
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add i8 %a, 1
+  %and = and i8 %add, 15
+  %cmp = icmp eq i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @test_ne_0_and_15_add_1(i8 %a) {
+; CHECK-LABEL: @test_ne_0_and_15_add_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP0]], 15
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add i8 %a, 1
+  %and = and i8 %add, 15
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @test_eq_0_and_15_add_3(i8 %a) {
+; CHECK-LABEL: @test_eq_0_and_15_add_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP0]], 13
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add i8 %a, 3
+  %and = and i8 %add, 15
+  %cmp = icmp eq i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @test_ne_0_and_15_add_3(i8 %a) {
+; CHECK-LABEL: @test_ne_0_and_15_add_3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP0]], 13
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add i8 %a, 3
+  %and = and i8 %add, 15
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @test_eq_11_and_15_add_10(i8 %a) {
+; CHECK-LABEL: @test_eq_11_and_15_add_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP0]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add i8 %a, 10
+  %and = and i8 %add, 15
+  %cmp = icmp eq i8 %and, 11
+  ret i1 %cmp
+}
+
+define i1 @test_ne_11_and_15_add_10(i8 %a) {
+; CHECK-LABEL: @test_ne_11_and_15_add_10(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[A:%.*]], 15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP0]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %add = add i8 %a, 10
+  %and = and i8 %add, 15
+  %cmp = icmp ne i8 %and, 11
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstCombine/cast_phi.ll b/llvm/test/Transforms/InstCombine/cast_phi.ll
index 2819b7d05f7b309..6b05edc31deb87a 100644
--- a/llvm/test/Transforms/InstCombine/cast_phi.ll
+++ b/llvm/test/Transforms/InstCombine/cast_phi.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes="instcombine<no-verify-fixpoint>" -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 target datalayout = "n32:64"
 
@@ -309,7 +309,7 @@ exit:
   ret i64 %r
 }
 
-define i8 @trunc_in_loop_exit_block() {
+define i8 @trunc_in_loop_exit_block() "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @trunc_in_loop_exit_block(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll
index 7f800f614c47d7c..ed4fcc6ecaac723 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-iteration.ll
@@ -1,11 +1,12 @@
-; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S -debug 2>&1 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S -debug 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 
 ; This test disables fixpoint verification, because that would cause a second
 ; iteration for verification.
 
-define i32 @a() nounwind readnone {
+define i32 @a() nounwind readnone "instcombine-no-verify-fixpoint" {
 entry:
   %cmp = icmp eq i32 0, ptrtoint (ptr @a to i32)
   %ext = zext i1 %cmp to i32
diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
index e8a25ff44d02966..a91c9bfc91c40d2 100644
--- a/llvm/test/Transforms/InstCombine/div.ll
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -429,9 +429,8 @@ define <2 x i32> @test31(<2 x i32> %x) {
 
 define i32 @test32(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test32(
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 2, [[B:%.*]]
-; CHECK-NEXT:    [[DIV:%.*]] = lshr i32 [[SHL]], 2
-; CHECK-NEXT:    [[DIV2:%.*]] = udiv i32 [[A:%.*]], [[DIV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[B:%.*]], -1
+; CHECK-NEXT:    [[DIV2:%.*]] = lshr i32 [[A:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[DIV2]]
 ;
   %shl = shl i32 2, %b
@@ -1832,3 +1831,41 @@ define i32 @fold_disjoint_or_over_udiv(i32 %x) {
   %r = udiv i32 %or, 9
   ret i32 %r
 }
+
+define i8 @udiv_trunc_shl(i32 %x) {
+; CHECK-LABEL: @udiv_trunc_shl(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[UDIV1:%.*]] = lshr i8 8, [[TMP1]]
+; CHECK-NEXT:    ret i8 [[UDIV1]]
+;
+  %lshr = shl i32 1, %x
+  %trunc = trunc i32 %lshr to i8
+  %div = udiv i8 8, %trunc
+  ret i8 %div
+}
+
+define i32 @zext_udiv_trunc_lshr(i32 %x) {
+; CHECK-LABEL: @zext_udiv_trunc_lshr(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i8
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 5, [[TMP1]]
+; CHECK-NEXT:    [[UDIV1:%.*]] = lshr i8 8, [[TMP2]]
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext nneg i8 [[UDIV1]] to i32
+; CHECK-NEXT:    ret i32 [[ZEXT]]
+;
+  %lshr = lshr i32 32, %x
+  %trunc = trunc i32 %lshr to i8
+  %div = udiv i8 8, %trunc
+  %zext = zext i8 %div to i32
+  ret i32 %zext
+}
+
+define i32 @udiv_and_shl(i32 %a, i32 %b, i32 %c) {
+; CHECK-LABEL: @udiv_and_shl(
+; CHECK-NEXT:    [[DIV1:%.*]] = lshr i32 [[C:%.*]], [[A:%.*]]
+; CHECK-NEXT:    ret i32 [[DIV1]]
+;
+  %shl = shl i32 1, %a
+  %and = and i32 %b, %shl
+  %div = udiv i32 %c, %and
+  ret i32 %div
+}
diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll
index 3c4088832feaaa6..6a9b3309bb347ec 100644
--- a/llvm/test/Transforms/InstCombine/fneg.ll
+++ b/llvm/test/Transforms/InstCombine/fneg.ll
@@ -1109,4 +1109,36 @@ define float @test_fneg_select_maxnum(float %x) {
   ret float %neg
 }
 
+; Check that there's no infinite loop.
+define <vscale x 2 x double> @test_fneg_select_svec(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) {
+; CHECK-LABEL: @test_fneg_select_svec(
+; CHECK-NEXT:    [[TMP2:%.*]] = fneg fast <vscale x 2 x double> [[TMP1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x double> [[TMP2]]
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+;
+  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> %b
+  %2 = fneg fast <vscale x 2 x double> %1
+  ret <vscale x 2 x double> %2
+}
+
+define <vscale x 2 x double> @test_fneg_select_svec_2(<vscale x 2 x i1> %cond, <vscale x 2 x double> %a) {
+; CHECK-LABEL: @test_fneg_select_svec_2(
+; CHECK-NEXT:    [[A_NEG:%.*]] = fneg fast <vscale x 2 x double> [[A:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = select fast <vscale x 2 x i1> [[COND:%.*]], <vscale x 2 x double> [[A_NEG]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> %a, <vscale x 2 x double> zeroinitializer
+  %2 = fneg fast <vscale x 2 x double> %1
+  ret <vscale x 2 x double> %2
+}
+
+define <vscale x 2 x double> @test_fneg_select_svec_3(<vscale x 2 x i1> %cond, <vscale x 2 x double> %b) {
+; CHECK-LABEL: @test_fneg_select_svec_3(
+; CHECK-NEXT:    ret <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double -0.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+;
+  %1 = select <vscale x 2 x i1> %cond, <vscale x 2 x double> zeroinitializer, <vscale x 2 x double> zeroinitializer
+  %2 = fneg fast <vscale x 2 x double> %1
+  ret <vscale x 2 x double> %2
+}
+
 !0 = !{}
diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll
index 36b3216196f8467..56115f6d7d34144 100644
--- a/llvm/test/Transforms/InstCombine/icmp-or.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-or.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 declare void @use(i8)
 
@@ -432,7 +432,7 @@ define i1 @icmp_or_xor_2_ne_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
 ; simplify `%cmp_1 = icmp eq i64 %xor, 0`, `%xor = xor i64 %x1, %y1`
 ; has one use which allows for complete simplification (rooted on
 ; `%or1 = or i1 %cmp, %cmp_1` so we don't end up adding it back).
-define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
+define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @icmp_or_xor_2_3_fail(
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i64 [[X1:%.*]], [[Y1:%.*]]
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
@@ -453,7 +453,7 @@ define i1 @icmp_or_xor_2_3_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
 
 ; negative test - xor multiuse
 
-define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) {
+define i1 @icmp_or_xor_2_4_fail(i64 %x1, i64 %y1, i64 %x2, i64 %y2) "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @icmp_or_xor_2_4_fail(
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i64 [[X1:%.*]], [[Y1:%.*]]
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor i64 [[X2:%.*]], [[Y2:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/pr105510.ll b/llvm/test/Transforms/InstCombine/pr105510.ll
new file mode 100644
index 000000000000000..844fa14ad991ee9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr105510.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; Make sure we don't crash in this case.
+@g = global i32 0
+
+define i1 @foo() {
+; CHECK-LABEL: define i1 @foo() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 ptrtoint (ptr @g to i1), label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  br i1 ptrtoint (ptr @g to i1), label %if.then, label %if.else
+
+if.then:
+  ret i1 true
+
+if.else:
+  ret i1 false
+}
+
+define i1 @bar() {
+; CHECK-LABEL: define i1 @bar() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 ptrtoint (ptr @g to i1), label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    ret i1 false
+;
+entry:
+  br i1 ptrtoint (ptr @g to i1), label %if.then, label %if.else
+
+if.then:
+  ret i1 true
+
+if.else:
+  ret i1 false
+}
diff --git a/llvm/test/Transforms/InstCombine/pr55228.ll b/llvm/test/Transforms/InstCombine/pr55228.ll
index 5e34c074346e3c6..c959bf16bcb5d52 100644
--- a/llvm/test/Transforms/InstCombine/pr55228.ll
+++ b/llvm/test/Transforms/InstCombine/pr55228.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
 ; This does not reach a fixpoint, because the global initializer is not in
 ; folded form. This will not happen if preceded by a GlobalOpt run.
@@ -9,7 +9,7 @@ target datalayout = "p:8:8"
 @g = external global i8
 @c = constant ptr getelementptr inbounds (i8, ptr @g, i64 1)
 
-define i1 @test(ptr %p) {
+define i1 @test(ptr %p) "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P:%.*]], getelementptr inbounds (i8, ptr @g, i64 1)
 ; CHECK-NEXT:    ret i1 [[CMP]]
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 558f4ffbfcabe46..986e1073c638913 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -1,8 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
-
-; The fuzzer-generated @ashr_out_of_range test case does not reach a fixpoint,
-; because a logical and it not relaxed to a bitwise and in one iteration.
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 declare void @use(i64)
 declare void @use_i32(i32)
@@ -677,8 +674,8 @@ entry:
 
 define i32 @test42(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test42(
-; CHECK-NEXT:    [[DIV:%.*]] = lshr exact i32 4096, [[B:%.*]]
-; CHECK-NEXT:    [[DIV2:%.*]] = udiv i32 [[A:%.*]], [[DIV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 12, [[B:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = lshr i32 [[A:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[DIV2]]
 ;
   %div = lshr i32 4096, %b    ; must be exact otherwise we'd divide by zero
@@ -688,8 +685,8 @@ define i32 @test42(i32 %a, i32 %b) {
 
 define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test42vec(
-; CHECK-NEXT:    [[DIV:%.*]] = lshr exact <2 x i32> <i32 4096, i32 4096>, [[B:%.*]]
-; CHECK-NEXT:    [[DIV2:%.*]] = udiv <2 x i32> [[A:%.*]], [[DIV]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 12, i32 12>, [[B:%.*]]
+; CHECK-NEXT:    [[DIV2:%.*]] = lshr <2 x i32> [[A:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[DIV2]]
 ;
   %div = lshr <2 x i32> <i32 4096, i32 4096>, %b    ; must be exact otherwise we'd divide by zero
@@ -1735,9 +1732,12 @@ define i177 @lshr_out_of_range2(i177 %Y, ptr %A2, ptr %ptr) {
   ret i177 %B1
 }
 
+; The fuzzer-generated @ashr_out_of_range test case does not reach a fixpoint,
+; because a logical and it not relaxed to a bitwise and in one iteration.
+
 ; OSS Fuzz #5032
 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=5032
-define void @ashr_out_of_range(ptr %A) {
+define void @ashr_out_of_range(ptr %A) "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @ashr_out_of_range(
 ; CHECK-NEXT:    [[L:%.*]] = load i177, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i177 [[L]], -1
diff --git a/llvm/test/Transforms/InstCombine/sink_instruction.ll b/llvm/test/Transforms/InstCombine/sink_instruction.ll
index dac40852c4bdcbf..cb9a3069ca5fd4b 100644
--- a/llvm/test/Transforms/InstCombine/sink_instruction.ll
+++ b/llvm/test/Transforms/InstCombine/sink_instruction.ll
@@ -1,9 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -passes='instcombine<no-verify-fixpoint>' -S < %s | FileCheck %s
-
-; We fail to reach a fixpoint, because sunk instructions get revisited too
-; early. In @test2 the sunk add is revisited before the dominating condition
-; is visited and added to the DomConditionCache.
+; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 ;; This tests that the instructions in the entry blocks are sunk into each
 ;; arm of the 'if'.
@@ -31,9 +27,12 @@ endif:          ; preds = %entry
   ret i32 %tmp.2
 }
 
+; We fail to reach a fixpoint, because sunk instructions get revisited too
+; early. In @test2 the sunk add is revisited before the dominating condition
+; is visited and added to the DomConditionCache.
 
 ;; PHI use, sink divide before call.
-define i32 @test2(i32 %x) nounwind ssp {
+define i32 @test2(i32 %x) nounwind ssp "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[BB:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 75a84e51279b805..39a9db02eef2930 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -2377,3 +2377,71 @@ define <2 x i32> @not_splat_shuffle2(i32 %x) {
   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <2 x i32> <i32 1, i32 3>
   ret <2 x i32> %shuf
 }
+define <2 x i32> @foldselect0(i1 %c) {
+; CHECK-LABEL: @foldselect0(
+; CHECK-NEXT:    [[SHUF:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 7, i32 42>, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    ret <2 x i32> [[SHUF]]
+;
+  %sel = select i1 %c, <2 x i32> <i32 42, i32 7>, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <2 x i32> %sel, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+  ret <2 x i32> %shuf
+}
+
+; Make sure we do not crash in this case.
+define <4 x float> @shuf_larger_length_vec_select(<2 x i1> %cond) {
+; CHECK-LABEL: @shuf_larger_length_vec_select(
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x float> zeroinitializer, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[SEL]], <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:    ret <4 x float> [[SHUF]]
+;
+  %sel = select <2 x i1> %cond, <2 x float> zeroinitializer, <2 x float> splat(float 1.000000e+00)
+  %shuf = shufflevector <2 x float> %sel, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuf
+}
+
+; Make sure we do not fold in this case.
+define <4 x i32> @shuf_same_length_vec_select(<4 x i1> %cond) {
+; CHECK-LABEL: @shuf_same_length_vec_select(
+; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[COND:%.*]], <4 x i32> <i32 poison, i32 1, i32 2, i32 3>, <4 x i32> <i32 poison, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[SEL]], <4 x i32> <i32 poison, i32 9, i32 poison, i32 poison>, <4 x i32> <i32 2, i32 1, i32 3, i32 5>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF]]
+;
+  %sel = select <4 x i1> %cond, <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuf = shufflevector <4 x i32> %sel, <4 x i32> <i32 8, i32 9, i32 10, i32 11>, <4 x i32> <i32 2, i32 1, i32 3, i32 5>
+  ret <4 x i32> %shuf
+}
+
+declare i1 @cond()
+declare <4 x i32> @value()
+
+define <4 x i32> @foldphi1() {
+; CHECK-LABEL: @foldphi1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[V:%.*]] = phi <4 x i32> [ zeroinitializer, [[ENTRY:%.*]] ], [ [[XOR:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[VAL:%.*]] = call <4 x i32> @value()
+; CHECK-NEXT:    [[XOR]] = xor <4 x i32> [[V]], [[VAL]]
+; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[SHUF1:%.*]] = shufflevector <4 x i32> [[XOR]], <4 x i32> poison, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    ret <4 x i32> [[SHUF1]]
+;
+entry:
+  br label %loop
+
+loop:
+  %v = phi <4 x i32> [zeroinitializer, %entry], [%shuf1, %loop]
+
+  %shuf0 = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+  %val = call <4 x i32> @value()
+  %xor = xor <4 x i32> %shuf0, %val
+  %shuf1 = shufflevector <4 x i32> %xor, <4 x i32> poison, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
+
+  %c = call i1 @cond()
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret <4 x i32> %shuf1
+}
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index acf547b55722fc9..feb4be9e370505c 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) {
 ; CHECK-LABEL: @zext_or_icmp_icmp(
@@ -168,7 +168,7 @@ define i32 @PR49475(i32 %x, i16 %y) {
 
 ; This would infinite-loop.
 
-define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) {
+define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: @PR49475_infloop(
 ; CHECK-NEXT:    [[B2:%.*]] = icmp eq i16 [[INSERT:%.*]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[T0:%.*]], 1
diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll
index bb557500822c143..06b23200bafff81 100644
--- a/llvm/test/Transforms/InstSimplify/fp-nan.ll
+++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll
@@ -237,8 +237,7 @@ define <2 x double> @unary_fneg_nan_2(<2 x double> %x) {
 ; FIXME: This doesn't behave the same way as the fixed-length vectors above
 define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_0() {
 ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_0(
-; CHECK-NEXT:    [[R:%.*]] = fneg <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF1234567890ABC, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 1 x double> [[R]]
+; CHECK-NEXT:    ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0x7FF1234567890ABC, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
 ;
   %r = fneg <vscale x 1 x double> splat (double 0xFFF1234567890ABC)
   ret <vscale x 1 x double> %r
@@ -247,8 +246,7 @@ define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_0() {
 ; FIXME: This doesn't behave the same way as the fixed-length vectors above
 define <vscale x 1 x double> @unary_fneg_nan_2_scalable_vec_1() {
 ; CHECK-LABEL: @unary_fneg_nan_2_scalable_vec_1(
-; CHECK-NEXT:    [[R:%.*]] = fneg <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0x7FF0000000000001, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <vscale x 1 x double> [[R]]
+; CHECK-NEXT:    ret <vscale x 1 x double> shufflevector (<vscale x 1 x double> insertelement (<vscale x 1 x double> poison, double 0xFFF0000000000001, i64 0), <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer)
 ;
   %r = fneg <vscale x 1 x double> splat (double 0x7FF0000000000001)
   ret <vscale x 1 x double> %r
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
index 81121019efe767c..76562e80fbc4a19 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll
@@ -24,43 +24,36 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP4]], 4
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[INDEX]], -1
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[N]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP10:%.*]] = shl i64 [[TMP9]], 3
-; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 1, [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP14:%.*]] = shl i64 [[TMP13]], 3
-; CHECK-NEXT:    [[TMP15:%.*]] = sub i64 0, [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = sub i64 1, [[TMP14]]
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 [[TMP16]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP18]], align 8
-; CHECK-NEXT:    [[TMP19:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP20:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP23:%.*]] = shl i64 [[TMP22]], 3
-; CHECK-NEXT:    [[TMP24:%.*]] = sub i64 1, [[TMP23]]
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP24]]
-; CHECK-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP27:%.*]] = shl i64 [[TMP26]], 3
-; CHECK-NEXT:    [[TMP28:%.*]] = sub i64 0, [[TMP27]]
-; CHECK-NEXT:    [[TMP29:%.*]] = sub i64 1, [[TMP27]]
-; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds double, ptr [[TMP21]], i64 [[TMP28]]
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds double, ptr [[TMP30]], i64 [[TMP29]]
-; CHECK-NEXT:    store <vscale x 8 x double> [[TMP19]], ptr [[TMP25]], align 8
-; CHECK-NEXT:    store <vscale x 8 x double> [[TMP20]], ptr [[TMP31]], align 8
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
-; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i64 [[INDEX]], -1
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[N]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8
+; CHECK-NEXT:    [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 8 x double> insertelement (<vscale x 8 x double> poison, double 1.000000e+00, i64 0), <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP19:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = sub i64 0, [[TMP5]]
+; CHECK-NEXT:    [[TMP22:%.*]] = sub i64 1, [[TMP5]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]]
+; CHECK-NEXT:    store <vscale x 8 x double> [[TMP16]], ptr [[TMP20]], align 8
+; CHECK-NEXT:    store <vscale x 8 x double> [[TMP17]], ptr [[TMP24]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -75,8 +68,8 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
 ; CHECK-NEXT:    [[I_08_IN:%.*]] = phi i64 [ [[I_08:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_08]] = add nsw i64 [[I_08_IN]], -1
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[I_08]]
-; CHECK-NEXT:    [[TMP33:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP33]], 1.000000e+00
+; CHECK-NEXT:    [[TMP26:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP26]], 1.000000e+00
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[I_08]]
 ; CHECK-NEXT:    store double [[ADD]], ptr [[ARRAYIDX1]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_08_IN]], 1
@@ -126,43 +119,36 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP6]]
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[N]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 4
+; CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 3
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i64 [[TMP7]], 4
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[INDEX]], -1
-; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[N]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP13:%.*]] = shl i64 [[TMP12]], 3
-; CHECK-NEXT:    [[TMP14:%.*]] = sub i64 1, [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP17:%.*]] = shl i64 [[TMP16]], 3
-; CHECK-NEXT:    [[TMP18:%.*]] = sub i64 0, [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = sub i64 1, [[TMP17]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i64 [[TMP18]]
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i64 [[TMP19]]
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP15]], align 8
-; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i64>, ptr [[TMP21]], align 8
-; CHECK-NEXT:    [[TMP22:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP23:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP10]]
-; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP26:%.*]] = shl i64 [[TMP25]], 3
-; CHECK-NEXT:    [[TMP27:%.*]] = sub i64 1, [[TMP26]]
-; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP27]]
-; CHECK-NEXT:    [[TMP29:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP30:%.*]] = shl i64 [[TMP29]], 3
-; CHECK-NEXT:    [[TMP31:%.*]] = sub i64 0, [[TMP30]]
-; CHECK-NEXT:    [[TMP32:%.*]] = sub i64 1, [[TMP30]]
-; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 [[TMP31]]
-; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i64, ptr [[TMP33]], i64 [[TMP32]]
-; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP22]], ptr [[TMP28]], align 8
-; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP23]], ptr [[TMP34]], align 8
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT:    [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = xor i64 [[INDEX]], -1
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[N]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 1, [[TMP8]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = sub i64 0, [[TMP8]]
+; CHECK-NEXT:    [[TMP16:%.*]] = sub i64 1, [[TMP8]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP14]], align 8
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i64>, ptr [[TMP18]], align 8
+; CHECK-NEXT:    [[TMP19:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 1, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP22:%.*]] = sub i64 1, [[TMP8]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = sub i64 0, [[TMP8]]
+; CHECK-NEXT:    [[TMP25:%.*]] = sub i64 1, [[TMP8]]
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]]
+; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP19]], ptr [[TMP23]], align 8
+; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], ptr [[TMP27]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -177,8 +163,8 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-NEXT:    [[I_09_IN:%.*]] = phi i64 [ [[I_09:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[I_09]] = add nsw i64 [[I_09_IN]], -1
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I_09]]
-; CHECK-NEXT:    [[TMP36:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
-; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP36]], 1
+; CHECK-NEXT:    [[TMP29:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP29]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I_09]]
 ; CHECK-NEXT:    store i64 [[ADD]], ptr [[ARRAYIDX2]], align 8
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[I_09_IN]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 9a716f7756072e1..c7bb1ffab23e79e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -54,6 +54,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<[[VF:%.+]]> = VF
 ; CHECK-NEXT:  Live-in vp<[[VFxUF:%.+]]> = VF * UF
 ; CHECK-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-NEXT:  vp<[[TC:%.+]]> = original trip-count
@@ -74,11 +75,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
 ; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
+; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN ir<%1> = load vp<[[VEC_PTR]]>
 ; CHECK-NEXT:      WIDEN ir<%add9> = add ir<%1>, ir<1>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
+; CHECK-NEXT:      vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
 ; CHECK-NEXT:      EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT:      EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
@@ -138,6 +139,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
+; CHECK-NEXT:  Live-in vp<[[VF:%.+]]> = VF
 ; CHECK-NEXT:  Live-in vp<[[VFxUF:%.+]]> = VF * UF
 ; CHECK-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-NEXT:  vp<[[TC:%.+]]> = original trip-count
@@ -158,11 +160,11 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
 ; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
+; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN ir<%13> = load vp<[[VEC_PTR]]>
 ; CHECK-NEXT:      WIDEN ir<%add9> = add ir<%13>, ir<1>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
+; CHECK-NEXT:      vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN store vp<[[VEC_PTR2]]>, ir<%add9>
 ; CHECK-NEXT:      EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT:      EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
@@ -259,6 +261,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LV: Scalarizing: %cmp = icmp ugt i64 %indvars.iv, 1
 ; CHECK-NEXT:  LV: Scalarizing: %indvars.iv.next = add nsw i64 %indvars.iv, -1
 ; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<[[VF:%.+]]> = VF
 ; CHECK-NEXT:  Live-in vp<[[VFxUF:%.+]]> = VF * UF
 ; CHECK-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-NEXT:  vp<[[TC:%.+]]> = original trip-count
@@ -279,11 +282,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
 ; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
+; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN ir<%1> = load vp<[[VEC_PTR]]>
 ; CHECK-NEXT:      WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR2:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
+; CHECK-NEXT:      vp<[[VEC_PTR2:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN store vp<[[VEC_PTR2]]>, ir<%conv1>
 ; CHECK-NEXT:      EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT:      EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
@@ -343,6 +346,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
 ; CHECK-NEXT:  Executing best plan with VF=vscale x 4, UF=1
 ; CHECK-NEXT:  VPlan 'Final VPlan for VF={vscale x 4},UF={1}' {
+; CHECK-NEXT:  Live-in vp<[[VF:%.+]]> = VF
 ; CHECK-NEXT:  Live-in vp<[[VFxUF:%.+]]> = VF * UF
 ; CHECK-NEXT:  Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-NEXT:  vp<[[TC:%.+]]> = original trip-count
@@ -363,11 +367,11 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
 ; CHECK-NEXT:      CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
 ; CHECK-NEXT:      CLONE ir<%idxprom> = zext ir<%i.0>
 ; CHECK-NEXT:      CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx>
+; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN ir<%13> = load vp<[[VEC_PTR]]>
 ; CHECK-NEXT:      WIDEN ir<%conv1> = fadd ir<%13>, ir<1.000000e+00>
 ; CHECK-NEXT:      CLONE ir<%arrayidx3> = getelementptr inbounds ir<%A>, ir<%idxprom>
-; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%arrayidx3>
+; CHECK-NEXT:      vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%arrayidx3>, vp<[[VF]]>
 ; CHECK-NEXT:      WIDEN store vp<[[VEC_PTR]]>, ir<%conv1>
 ; CHECK-NEXT:      EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT:      EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VEC_TC]]>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index c1cf8b0fc541e74..9a001f36da7d4fb 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -28,34 +28,30 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
 ; IF-EVL:       vector.body:
 ; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT:    [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
-; IF-EVL-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true)
+; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
 ; IF-EVL-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
-; IF-EVL-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], -1
-; IF-EVL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP8]]
-; IF-EVL-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
-; IF-EVL-NEXT:    [[TMP12:%.*]] = mul i64 0, [[TMP11]]
-; IF-EVL-NEXT:    [[TMP13:%.*]] = sub i64 1, [[TMP11]]
-; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 [[TMP12]]
-; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP13]]
-; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP8]]
-; IF-EVL-NEXT:    [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP17]], 4
-; IF-EVL-NEXT:    [[TMP19:%.*]] = mul i64 0, [[TMP18]]
-; IF-EVL-NEXT:    [[TMP20:%.*]] = sub i64 1, [[TMP18]]
-; IF-EVL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP19]]
-; IF-EVL-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 [[TMP20]]
-; IF-EVL-NEXT:    [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP22]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP6]] to i64
-; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
+; IF-EVL-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], -1
+; IF-EVL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]]
+; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 0, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 1, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP9]]
+; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP10]]
+; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
+; IF-EVL-NEXT:    [[TMP14:%.*]] = mul i64 0, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP15:%.*]] = sub i64 1, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[TMP14]]
+; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i64 [[TMP15]]
+; IF-EVL-NEXT:    [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
+; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; IF-EVL:       middle.block:
 ; IF-EVL-NEXT:    br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
 ; IF-EVL:       scalar.ph:
@@ -131,49 +127,45 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
 ; IF-EVL:       vector.body:
 ; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-NEXT:    [[TMP5:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
-; IF-EVL-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP5]], i32 4, i1 true)
+; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
 ; IF-EVL-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
-; IF-EVL-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; IF-EVL-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; IF-EVL-NEXT:    [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32
-; IF-EVL-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX3]], 0
+; IF-EVL-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0
 ; IF-EVL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0
 ; IF-EVL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; IF-EVL-NEXT:    [[TMP9:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; IF-EVL-NEXT:    [[TMP10:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP9]]
-; IF-EVL-NEXT:    [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP10]]
-; IF-EVL-NEXT:    [[TMP11:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[TMP7]], -1
-; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]]
-; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
-; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[TMP15:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-NEXT:    [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> zeroinitializer
-; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]]
-; IF-EVL-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP18]], 4
-; IF-EVL-NEXT:    [[TMP20:%.*]] = mul i64 0, [[TMP19]]
-; IF-EVL-NEXT:    [[TMP21:%.*]] = sub i64 1, [[TMP19]]
-; IF-EVL-NEXT:    [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]]
-; IF-EVL-NEXT:    [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]]
-; IF-EVL-NEXT:    [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP6]])
-; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]]
-; IF-EVL-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP25]], 4
-; IF-EVL-NEXT:    [[TMP27:%.*]] = mul i64 0, [[TMP26]]
-; IF-EVL-NEXT:    [[TMP28:%.*]] = sub i64 1, [[TMP26]]
-; IF-EVL-NEXT:    [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]]
-; IF-EVL-NEXT:    [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]]
-; IF-EVL-NEXT:    [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP30]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP6]])
-; IF-EVL-NEXT:    [[TMP31:%.*]] = zext i32 [[TMP6]] to i64
-; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; IF-EVL-NEXT:    [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
+; IF-EVL-NEXT:    [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]]
+; IF-EVL-NEXT:    [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1023, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
+; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[TMP6]], -1
+; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]]
+; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
+; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; IF-EVL-NEXT:    [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
+; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]]
+; IF-EVL-NEXT:    [[TMP17:%.*]] = mul i64 0, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP18:%.*]] = sub i64 1, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]]
+; IF-EVL-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]]
+; IF-EVL-NEXT:    [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
+; IF-EVL-NEXT:    [[TMP22:%.*]] = mul i64 0, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP23:%.*]] = sub i64 1, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]]
+; IF-EVL-NEXT:    [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]]
+; IF-EVL-NEXT:    [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP5]])
+; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
+; IF-EVL-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
+; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP26]], [[EVL_BASED_IV]]
 ; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; IF-EVL:       middle.block:
 ; IF-EVL-NEXT:    br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
 ; IF-EVL:       scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll
index 870925950ae4984..c492b296903e60a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-uniform-store.ll
@@ -38,10 +38,8 @@ define void @lshift_significand(i32 %n, ptr nocapture writeonly %dst) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub nuw nsw i64 1, [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 2
-; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 0, [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = sub i64 1, [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 0, [[TMP9]]
+; CHECK-NEXT:    [[TMP18:%.*]] = sub i64 1, [[TMP9]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i64, ptr [[TMP14]], i64 [[TMP17]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i64, ptr [[TMP19]], i64 [[TMP18]]
 ; CHECK-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 2 x i64> @llvm.experimental.vp.reverse.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), i32 [[TMP11]])
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 1326751a847d7d5..59db6c197ef8ca2 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
 ; IF-EVL-INLOOP-NEXT: No successors
 ; IF-EVL-INLOOP-EMPTY:
 ; IF-EVL-INLOOP-NEXT: scalar.ph:
+; IF-EVL-INLOOP-NEXT:   EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
 ; IF-EVL-INLOOP-NEXT: No successors
+; IF-EVL-INLOOP-EMPTY:
+; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
 ; IF-EVL-INLOOP-NEXT: }
 ;
 
@@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
 ; NO-VP-OUTLOOP-NEXT: No successors
 ; NO-VP-OUTLOOP-EMPTY:
 ; NO-VP-OUTLOOP-NEXT: scalar.ph:
+; NO-VP-OUTLOOP-NEXT:   EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
 ; NO-VP-OUTLOOP-NEXT: No successors
+; NO-VP-OUTLOOP-EMPTY:
+; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
 ; NO-VP-OUTLOOP-NEXT: }
 ;
 
@@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
 ; NO-VP-INLOOP-NEXT: No successors
 ; NO-VP-INLOOP-EMPTY:
 ; NO-VP-INLOOP-NEXT: scalar.ph:
+; NO-VP-INLOOP-NEXT:   EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
 ; NO-VP-INLOOP-NEXT: No successors
+; NO-VP-INLOOP-EMPTY:
+; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
 ; NO-VP-INLOOP-NEXT: }
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
index 54bb9352f3c89c6..9899eded7380864 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls-finite.ll
@@ -7,12 +7,10 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-declare float @__expf_finite(float) #0
-
+define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32
 ; CHECK: <4 x float> @amd_vrs4_expf
 ; CHECK: ret
-define void @exp_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -25,23 +23,16 @@ for.body:                                         ; preds = %for.body, %entry
   store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!1 = distinct !{!1, !2, !3}
-!2 = !{!"llvm.loop.vectorize.width", i32 4}
-!3 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-
-declare double @__exp_finite(double) #0
-
+define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64
 ; CHECK: <4 x double> @amd_vrd4_exp
 ; CHECK: ret
-define void @exp_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -54,25 +45,16 @@ for.body:                                         ; preds = %for.body, %entry
   store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !11
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!11 = distinct !{!11, !12, !13}
-!12 = !{!"llvm.loop.vectorize.width", i32 4}
-!13 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-
-
-
-declare float @__logf_finite(float) #0
-
+define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32
 ; CHECK: <4 x float> @amd_vrs4_logf
 ; CHECK: ret
-define void @log_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -85,23 +67,16 @@ for.body:                                         ; preds = %for.body, %entry
   store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!21 = distinct !{!21, !22, !23}
-!22 = !{!"llvm.loop.vectorize.width", i32 4}
-!23 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-
-declare double @__log_finite(double) #0
-
+define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64
 ; CHECK: <4 x double> @amd_vrd4_log
 ; CHECK: ret
-define void @log_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -114,23 +89,16 @@ for.body:                                         ; preds = %for.body, %entry
   store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!31 = distinct !{!31, !32, !33}
-!32 = !{!"llvm.loop.vectorize.width", i32 4}
-!33 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-
-declare float @__powf_finite(float, float) #0
-
+define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32
 ; CHECK: <4 x float> @amd_vrs4_powf
 ; CHECK: ret
-define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 entry:
   br label %for.body
 
@@ -145,23 +113,16 @@ for.body:                                         ; preds = %for.body, %entry
   store float %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !41
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!41 = distinct !{!41, !42, !43}
-!42 = !{!"llvm.loop.vectorize.width", i32 4}
-!43 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-
-declare double @__pow_finite(double, double) #0
-
+define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64
 ; CHECK: <4 x double> @amd_vrd4_pow
 ; CHECK: ret
-define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 entry:
   br label %for.body
 
@@ -176,18 +137,12 @@ for.body:                                         ; preds = %for.body, %entry
   store double %tmp2, ptr %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!51 = distinct !{!51, !52, !53}
-!52 = !{!"llvm.loop.vectorize.width", i32 4}
-!53 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-declare float @__exp2f_finite(float) #0
-
 define void @exp2f_finite(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2f_finite(
 ; CHECK:    call <4 x float> @amd_vrs4_exp2f(<4 x float> %{{.*}})
@@ -205,18 +160,12 @@ for.body:
   store float %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !61
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:
   ret void
 }
 
-!61 = distinct !{!61, !62, !63}
-!62 = !{!"llvm.loop.vectorize.width", i32 4}
-!63 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-declare double @__exp2_finite(double) #0
-
 define void @exp2_finite(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_finite(
 ; CHECK:    call <4 x double> @amd_vrd4_exp2(<4 x double> {{.*}})
@@ -234,22 +183,16 @@ for.body:
   store double %call, ptr %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
   %exitcond = icmp eq i64 %iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:
   ret void
 }
 
-!71 = distinct !{!71, !72, !73}
-!72 = !{!"llvm.loop.vectorize.width", i32 4}
-!73 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-declare float @__log2f_finite(float) #0
-
+define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32
 ; CHECK: <4 x float> @amd_vrs4_log2f
 ; CHECK: ret
-define void @log2_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -262,23 +205,16 @@ for.body:                                         ; preds = %for.body, %entry
   store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!81 = distinct !{!21, !22, !23}
-!82 = !{!"llvm.loop.vectorize.width", i32 4}
-!83 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-
-declare double @__log2_finite(double) #0
-
+define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64
 ; CHECK: <4 x double> @amd_vrd4_log2
 ; CHECK: ret
-define void @log2_f64(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -291,22 +227,16 @@ for.body:                                         ; preds = %for.body, %entry
   store double %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!91 = distinct !{!31, !32, !33}
-!92 = !{!"llvm.loop.vectorize.width", i32 4}
-!93 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-declare float @__log10f_finite(float) #0
-
+define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32
 ; CHECK: <4 x float> @amd_vrs4_log10f
 ; CHECK: ret
-define void @log10_f32(ptr nocapture %varray) {
 entry:
   br label %for.body
 
@@ -319,14 +249,173 @@ for.body:                                         ; preds = %for.body, %entry
   store float %call, ptr %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1000
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !21
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @log10_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @log10_finite(
+; CHECK:    call <2 x double> @amd_vrd2_log10(<2 x double> {{.*}})
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @__log10_finite(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+  ret void
+}
+
+define void @exp10_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_finite(
+; CHECK:    call <2 x double> @amd_vrd2_exp10(<2 x double> {{.*}})
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @__exp10_finite(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+  ret void
+}
+
+define void @exp10_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f32
+; CHECK: <4 x float> @amd_vrs4_exp10f
+; CHECK: ret
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call fast float @__exp10f_finite(float %conv)
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
 
 for.end:                                          ; preds = %for.body
   ret void
 }
 
-!101 = distinct !{!21, !22, !23}
-!102 = !{!"llvm.loop.vectorize.width", i32 4}
-!103 = !{!"llvm.loop.vectorize.enable", i1 true}
+define void @asin_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @asin_finite(
+; CHECK:    call <8 x double> @amd_vrd8_asin(<8 x double> {{.*}})
+; CHECK:    ret void
+;
+entry:
+  br label %for.body
 
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @__asin_finite(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
+
+for.end:
+  ret void
+}
 
+define void @asinf_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @asinf_finite
+; CHECK: <4 x float> @amd_vrs4_asinf
+; CHECK: ret
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call fast float @__asinf_finite(float %conv)
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @acosf_finite(ptr nocapture %varray) {
+; CHECK-LABEL: @acosf_finite
+; CHECK: <4 x float> @amd_vrs4_acosf
+; CHECK: ret
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %tmp = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call fast float @__acosf_finite(float %conv)
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+  store float %call, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!1 = distinct !{!1, !2, !3}
+!2 = !{!"llvm.loop.vectorize.width", i32 2}
+!3 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+!4 = distinct !{!4, !5, !6}
+!5 = !{!"llvm.loop.vectorize.width", i32 4}
+!6 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+!7 = distinct !{!7, !8, !9}
+!8 = !{!"llvm.loop.vectorize.width", i32 8}
+!9 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+declare float @__expf_finite(float) #0
+declare double @__exp_finite(double) #0
+declare double @__log_finite(double) #0
+declare float @__logf_finite(float) #0
+declare float @__powf_finite(float, float) #0
+declare double @__pow_finite(double, double) #0
+declare float @__exp2f_finite(float) #0
+declare double @__exp2_finite(double) #0
+declare float @__log2f_finite(float) #0
+declare double @__log2_finite(double) #0
+declare float @__log10f_finite(float) #0
+declare double @__log10_finite(double) #0
+declare double @__exp10_finite(double) #0
+declare float @__exp10f_finite(float) #0
+declare double @__asin_finite(double) #0
+declare float @__asinf_finite(float) #0
+declare float @__acosf_finite(float) #0
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 4acc7fe7eaccf61..4ced0372e5da381 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -1444,6 +1444,32 @@ for.end:
   ret void
 }
 
+define void @log10_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @log10_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @log10(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
 define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
@@ -1470,6 +1496,32 @@ for.end:
   ret void
 }
 
+define void @log10_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @log10_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.log10.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
 define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
 ; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
@@ -1600,4 +1652,168 @@ for.end:
   ret void
 }
 
+define void @exp10_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4:    call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8:    call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:    call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @exp10(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp10_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f32(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @exp10f(float %conv)
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp10_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16:    [[TMP5:%.*]] = call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @llvm.exp10.f64(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp10_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f32_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @llvm.exp10.f32(float %conv)
+  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+  store float %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+
+define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @sincos_f64
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
+; CHECK-VF2-NOT:    call void @amd_vrd2_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; CHECK-VF4-NOT:    call void @amd_vrd4_sincos(<4 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; CHECK-VF8-NOT:    call void @amd_vrd8_sincos(<8 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; CHECK:        ret void
+; 
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+  call void @sincos(double %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
+; CHECK-VF4-NOT:    call void @amd_vrs4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; CHECK-VF8-NOT:    call void @amd_vrs8_sincosf(<8 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; CHECK-VF16-NOT:    call void @amd_vrs16_sincosf(<16 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
+  call void @sincosf(float %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
 attributes #0 = { nounwind readnone }
+
+declare double @exp10(double) #0
+declare float @exp10f(float) #0
+declare double @llvm.exp10.f64(double) #0
+declare float @llvm.exp10.f32(float) #0
+declare void @sincos(double, ptr, ptr)
+declare void @sincosf(float, ptr, ptr)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 73647919aac3602..29e54fabad0c1bb 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -1037,6 +1037,71 @@ exit:
   ret i64 %red.mul
 }
 
+; Test case for https://github.com/llvm/llvm-project/issues/113526.
+define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
+; CHECK-LABEL: @narrowed_reduction(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP:%.*]] to i32
+; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = and <16 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i32> [[VEC_PHI1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <16 x i32> [[TMP0]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <16 x i32> [[TMP2]] to <16 x i1>
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
+; CHECK-NEXT:    [[TMP6]] = zext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT:    [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP9:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i1>
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i1>
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <16 x i1> [[TMP10]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[BIN_RDX]])
+; CHECK-NEXT:    [[TMP12:%.*]] = zext i1 [[TMP11]] to i32
+; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OR13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[OR13]], 1
+; CHECK-NEXT:    [[OR]] = or i32 [[AND]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV]], 0
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[OR_LCSSA]]
+;
+entry:
+  %conv = zext i1 %cmp to i32
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 1, %entry ], [ %inc, %loop ]
+  %or13 = phi i32 [ 0, %entry ], [ %or, %loop ]
+  %and = and i32 %or13, 1
+  %or = or i32 %and, %conv
+  %inc = add i32 %iv, 1
+  %ec = icmp eq i32 %iv, 0
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  ret i32 %or
+}
+
 declare void @llvm.assume(i1 noundef) #0
 
 attributes #0 = { "target-cpu"="penryn" }
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
index 8e56614a2e3d5c7..b05980bef1b38f2 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
 ; CHECK-NEXT:   EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
+; CHECK-NEXT:   EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
+; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
 ; CHECK-NEXT: }
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index c1322792071e45a..d983c5138164fc7 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -20,11 +20,11 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP0]], -1
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP0]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -93,11 +93,11 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i128 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP4:%.*]] = add i128 [[TMP0]], -1
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = add i128 [[TMP0]], -1
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 -4
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -176,11 +176,11 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = add i16 [[TMP4]], -1
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = add i16 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 -4
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 0dde507d08be747..2247295295663e1 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:   EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
 ; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
 ; CHECK-NEXT: }
 ;
 entry:
@@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:   EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
 ; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
 ; CHECK-NEXT: }
 ;
 entry:
@@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT: No successors
 ; CHECK-EMPTY:
 ; CHECK-NEXT: scalar.ph
+; CHECK-NEXT:   EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
 ; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
 ; CHECK-NEXT:}
 
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
index 0f3cd9d4ca4d614..446b720ad1ba492 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -1101,6 +1101,7 @@ exit:
 define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
 ; CHECK-LABEL: LV: Checking a loop in 'ptr_induction_remove_dead_recipe'
 ; CHECK:       VPlan 'Initial VPlan for VF={2},UF>=1' {
+; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
 ; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
 ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
 ; CHECK-NEXT: vp<[[TC:%.+]]> = original trip-count
@@ -1115,11 +1116,11 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT:   vector.body:
 ; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
-; CHECK-NEXT:     vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<%3> * ir<-1>
+; CHECK-NEXT:     vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<-1>
 ; CHECK-NEXT:     vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1>
 ; CHECK-NEXT:     EMIT vp<[[PTR_IV:%.+]]> = ptradd ir<%start>, vp<[[STEPS]]>
 ; CHECK-NEXT:     CLONE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1>
-; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer (reverse) ir<%ptr.iv.next>
+; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = reverse-vector-pointer inbounds ir<%ptr.iv.next>, vp<[[VF]]>
 ; CHECK-NEXT:     WIDEN ir<%l> = load vp<[[VEC_PTR]]>
 ; CHECK-NEXT:     WIDEN ir<%c.1> = icmp eq ir<%l>, ir<0>
 ; CHECK-NEXT:     EMIT vp<[[NEG:%.+]]> = not ir<%c.1>
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
new file mode 100644
index 000000000000000..75cebae0b82971a
--- /dev/null
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
@@ -0,0 +1,99 @@
+;; Test to make sure we don't fail when cloning in a case where we end up with
+;; a clone that has fewer edges than the node it was initially cloned from.
+;; This test was reduced and simplified from xalancbmk with some random hotness
+;; applied to the profile that reproduced the issue.
+
+; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
+; RUN:		-memprof-verify-ccg -memprof-verify-nodes \
+; RUN: 		-pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s
+
+;; Make sure we created some clones
+; CHECK: created clone A.memprof.1
+; CHECK: created clone C.memprof.1
+; CHECK: created clone D.memprof.1
+; CHECK: created clone E.memprof.1
+; CHECK: created clone B.memprof.1
+; CHECK: created clone F.memprof.1
+; CHECK: created clone G.memprof.1
+
+; ModuleID = '<stdin>'
+source_filename = "reduced.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+define void @A() {
+  call void @B(), !callsite !0
+  ret void
+}
+
+define void @C() {
+  call void @D(), !callsite !1
+  ret void
+}
+
+define void @D() {
+  call void @A(), !callsite !2
+  ret void
+}
+
+define void @E() {
+  %1 = call ptr @_Znwm(i64 0), !memprof !3, !callsite !20
+  ret void
+}
+
+define void @B() {
+  call void @F(), !callsite !21
+  ret void
+}
+
+define void @F() {
+  call void @E(), !callsite !22
+  call void @G(), !callsite !23
+  ret void
+}
+
+define void @G() {
+  %1 = call ptr @_Znwm(i64 0), !memprof !24, !callsite !37
+  ret void
+}
+
+declare ptr @_Znwm(i64)
+
+!0 = !{i64 1995602625719775354}
+!1 = !{i64 4312698517630782220}
+!2 = !{i64 5516454029445989383}
+!3 = !{!4, !6, !8, !10, !12, !14, !16, !18}
+!4 = !{!5, !"notcold"}
+!5 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 1995602625719775354}
+!6 = !{!7, !"cold"}
+!7 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 2077908580042347045, i64 4312698517630782220, i64 5379466077518675850}
+!8 = !{!9, !"cold"}
+!9 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 2077908580042347045, i64 4312698517630782220, i64 -7632894069000375689}
+!10 = !{!11, !"cold"}
+!11 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 2939944783060497247}
+!12 = !{!13, !"notcold"}
+!13 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 5642549674080861567, i64 5516454029445989383, i64 4312698517630782220, i64 -7632894069000375689}
+!14 = !{!15, !"cold"}
+!15 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 5642549674080861567, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293}
+!16 = !{!17, !"notcold"}
+!17 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 -4746997736434041076, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293}
+!18 = !{!19, !"notcold"}
+!19 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862, i64 7147584705143805656, i64 -6456074186195384663, i64 -4637272929643682959}
+!20 = !{i64 -2282665745786859978, i64 -6758300505622211768, i64 2938340307832638862}
+!21 = !{i64 -6456074186195384663}
+!22 = !{i64 7147584705143805656}
+!23 = !{i64 3938822378769440754}
+!24 = !{!25, !27, !29, !31, !33, !35}
+!25 = !{!26, !"cold"}
+!26 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 1995602625719775354, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293}
+!27 = !{!28, !"notcold"}
+!28 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 2077908580042347045, i64 4312698517630782220, i64 -7632894069000375689}
+!29 = !{!30, !"cold"}
+!30 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4746997736434041076, i64 5516454029445989383, i64 4312698517630782220, i64 -7632894069000375689}
+!31 = !{!32, !"notcold"}
+!32 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4746997736434041076, i64 5516454029445989383, i64 4312698517630782220, i64 -1805555115991223293}
+!33 = !{!34, !"cold"}
+!34 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4637272929643682959}
+!35 = !{!36, !"notcold"}
+!36 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196, i64 3938822378769440754, i64 -6456074186195384663, i64 -4409412896859835674}
+!37 = !{i64 -2282665745786859978, i64 -3548918226713766361, i64 4077289288013931196}
diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
index e5d62319bf9db7b..b5f29906b051eed 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
@@ -80,8 +80,8 @@ lpad:
 }
 
 define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-LABEL: @invoke_with_same_range()
-; CHECK: tail call i8 @invoke_with_range()
+; CHECK-DAG: @invoke_with_same_range()
+; CHECK-DAG: tail call i8 @invoke_with_range()
   %out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad
 
 next:
@@ -93,15 +93,15 @@ lpad:
 }
 
 define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
+; CHECK-DAG: @call_with_same_range()
+; CHECK-DAG: tail call i8 @call_with_range()
   %out = call range(i8 0, 2) i8 @dummy()
   ret i8 %out
 }
 
 define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
-; CHECK-LABEL: @call_with_same_range_attr
-; CHECK: tail call i8 @call_with_range_attr
+; CHECK-DAG: @call_with_same_range_attr
+; CHECK-DAG: tail call i8 @call_with_range_attr
   %out = call i8 @dummy2(i8 %v)
   ret i8 %out
 }
diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index e7718ca84d31657..39e5a11181a4f03 100644
--- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -64,8 +64,8 @@ lpad:
 }
 
 define i8 @invoke_with_same_range() personality ptr undef {
-; CHECK-LABEL: @invoke_with_same_range()
-; CHECK: tail call i8 @invoke_with_range()
+; CHECK-DAG: @invoke_with_same_range()
+; CHECK-DAG: tail call i8 @invoke_with_range()
   %out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0
 
 next:
@@ -77,8 +77,8 @@ lpad:
 }
 
 define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
+; CHECK-DAG: @call_with_same_range
+; CHECK-DAG: tail call i8 @call_with_range
   bitcast i8 0 to i8
   %out = call i8 @dummy(), !range !0
   ret i8 %out
diff --git a/llvm/test/Transforms/MergeFunc/inline-asm.ll b/llvm/test/Transforms/MergeFunc/inline-asm.ll
index 7cc6afd2f8f7bdc..970757e8d53afbb 100644
--- a/llvm/test/Transforms/MergeFunc/inline-asm.ll
+++ b/llvm/test/Transforms/MergeFunc/inline-asm.ll
@@ -3,11 +3,11 @@
 ; CHECK-LABEL: @int_ptr_arg_different
 ; CHECK-NEXT: call void asm
 
-; CHECK-LABEL: @int_ptr_null
-; CHECK-NEXT: tail call void @float_ptr_null()
+; CHECK-DAG: @int_ptr_null
+; CHECK-DAG: tail call void @float_ptr_null()
 
-; CHECK-LABEL: @int_ptr_arg_same
-; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0)
+; CHECK-DAG: @int_ptr_arg_same
+; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0)
 
 ; Used to satisfy minimum size limit
 declare void @stuff()
diff --git a/llvm/test/Transforms/PGOProfile/chr.ll b/llvm/test/Transforms/PGOProfile/chr.ll
index 34e39fe37979ac1..46f9a2bde7a2352 100644
--- a/llvm/test/Transforms/PGOProfile/chr.ll
+++ b/llvm/test/Transforms/PGOProfile/chr.ll
@@ -1,8 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes='require<profile-summary>,function(chr,instcombine<no-verify-fixpoint>,simplifycfg)' -S | FileCheck %s
-
-; FIXME: This does not currently reach a fix point, because we don't make use
-; of a freeze that is pushed up the instruction chain later.
+; RUN: opt < %s -passes='require<profile-summary>,function(chr,instcombine,simplifycfg)' -S | FileCheck %s
 
 declare void @foo()
 declare void @bar()
@@ -1910,6 +1907,9 @@ bb4:
   ret i32 %v13
 }
 
+; FIXME: This does not currently reach a fix point, because we don't make use
+; of a freeze that is pushed up the instruction chain later.
+
 ; Test the case where two scopes share a common instruction to hoist (%cmp.i).
 ; Two scopes would hoist it to their hoist points, but since the outer scope
 ; hoists (entry/bb6-9) it first to its hoist point, it'd be wrong (causing bad
@@ -1928,7 +1928,7 @@ bb4:
 ;     foo();
 ;  }
 ;  return 45;
-define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
+define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) "instcombine-no-verify-fixpoint" !prof !14 {
 ; CHECK-LABEL: @test_chr_21(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[I_FR:%.*]] = freeze i64 [[I:%.*]]
diff --git a/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
new file mode 100644
index 000000000000000..ab8cf8c010812b3
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/instr-gen-cold-function.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s  --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -S  | FileCheck --check-prefixes=COLD %s
+; RUN: opt < %s  --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -pgo-cold-instrument-entry-threshold=1 -S  | FileCheck --check-prefixes=ENTRY-COUNT %s
+; RUN: opt < %s  --passes=pgo-instr-gen -pgo-instrument-cold-function-only -pgo-function-entry-coverage -pgo-treat-unknown-as-cold -S  | FileCheck --check-prefixes=UNKNOWN-FUNC %s
+
+; COLD: call void @llvm.instrprof.cover(ptr @__profn_foo, i64  [[#]], i32 1, i32 0)
+; COLD-NOT: __profn_main
+; COLD-NOT: __profn_bar
+
+; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_foo, i64  [[#]], i32 1, i32 0)
+; ENTRY-COUNT: call void @llvm.instrprof.cover(ptr @__profn_main, i64 [[#]], i32 1, i32 0)
+
+; UNKNOWN-FUNC: call void @llvm.instrprof.cover(ptr @__profn_bar, i64  [[#]], i32 1, i32 0)
+; UNKNOWN-FUNC: call void @llvm.instrprof.cover(ptr @__profn_foo, i64  [[#]], i32 1, i32 0)
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @bar() {
+entry:
+  ret void
+}
+
+define void @foo() !prof !0 {
+entry:
+  ret void
+}
+
+define i32 @main() !prof !1 {
+entry:
+  ret i32 0
+}
+
+!0 = !{!"function_entry_count", i64 0}
+!1 = !{!"function_entry_count", i64 1}
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
index e1457ca7251ed88..205eeb8878989d2 100644
--- a/llvm/test/Transforms/PGOProfile/memprof.ll
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -66,6 +66,18 @@
 ;; Check that the total sizes are reported if requested.
 ; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdata>' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES
 
+;; Make sure we emit a random hotness seed if requested.
+; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND
+; RAND: random hotness seed =
+;; Can't check the exact values, but make sure applying the random profile
+;; succeeds with the same stats
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdatarand>' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=ALL,MEMPROFONLY,MEMPROFSTATS
+
+;; Make sure we use a specific random hotness seed if requested.
+; RUN: llvm-profdata merge -memprof-random-hotness -memprof-random-hotness-seed=1730170724 %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand2 2>&1 | FileCheck %s --check-prefix=RAND2
+; RAND2: random hotness seed = 1730170724
+; RUN: opt < %s -passes='memprof-use<profile-filename=%t.memprofdatarand2>' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2,ALL,MEMPROFONLY,MEMPROFSTATS
+
 ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched
 ; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched
 ; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched
@@ -372,6 +384,13 @@ for.end:                                          ; preds = %for.cond
 ; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409}
 ; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810}
 
+;; For the specific random seed, this is the expected order of hotness
+; MEMPROFRAND2: !"cold"
+; MEMPROFRAND2: !"cold"
+; MEMPROFRAND2: !"cold"
+; MEMPROFRAND2: !"hot"
+; MEMPROFRAND2: !"hot"
+
 ; MEMPROFSTATS:  8 memprof - Number of alloc contexts in memory profile.
 ; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile.
 ; MEMPROFSTATS:  6 memprof - Number of functions having valid memory profile.
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
new file mode 100644
index 000000000000000..7274e952567693d
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/predicated-reduction.ll
@@ -0,0 +1,294 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes="default<O3>" -S < %s  | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64"
+
+define nofpclass(nan inf) double @monte_simple(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) {
+; CHECK-LABEL: define nofpclass(nan inf) double @monte_simple(
+; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr nocapture noundef readonly [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_011:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V1_1:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[V0_010:%.*]] = phi double [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[V0_1:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = fpext float [[TMP0]] to double
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[Y]], [[CONV]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub fast double [[MUL]], [[Z]]
+; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ogt double [[SUB]], 0.000000e+00
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[SUB]], [[V0_010]]
+; CHECK-NEXT:    [[MUL3:%.*]] = fmul fast double [[SUB]], [[SUB]]
+; CHECK-NEXT:    [[ADD4:%.*]] = fadd fast double [[MUL3]], [[V1_011]]
+; CHECK-NEXT:    [[V0_1]] = select i1 [[CMP1]], double [[ADD]], double [[V0_010]]
+; CHECK-NEXT:    [[V1_1]] = select i1 [[CMP1]], double [[ADD4]], double [[V1_011]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
+; CHECK:       [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[V1_1]], [[V0_1]]
+; CHECK-NEXT:    br label %[[FOR_END]]
+; CHECK:       [[FOR_END]]:
+; CHECK-NEXT:    [[ADD5:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[TMP1]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret double [[ADD5]]
+;
+entry:
+  %nblocks.addr = alloca i32, align 4
+  %RAND_BLOCK_LENGTH.addr = alloca i32, align 4
+  %samples.addr = alloca ptr, align 8
+  %Y.addr = alloca double, align 8
+  %Z.addr = alloca double, align 8
+  %i = alloca i32, align 4
+  %block = alloca i32, align 4
+  %rngVal = alloca double, align 8
+  %callValue = alloca double, align 8
+  %v0 = alloca double, align 8
+  %v1 = alloca double, align 8
+  store i32 %nblocks, ptr %nblocks.addr, align 4
+  store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4
+  store ptr %samples, ptr %samples.addr, align 8
+  store double %Y, ptr %Y.addr, align 8
+  store double %Z, ptr %Z.addr, align 8
+  call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2
+  call void @llvm.lifetime.start.p0(i64 4, ptr %block) #2
+  call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #2
+  call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #2
+  call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #2
+  store double 0.000000e+00, ptr %v0, align 8
+  call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #2
+  store double 0.000000e+00, ptr %v1, align 8
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, ptr %i, align 4
+  %1 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load ptr, ptr %samples.addr, align 8
+  %3 = load i32, ptr %i, align 4
+  %idxprom = sext i32 %3 to i64
+  %arrayidx = getelementptr inbounds float, ptr %2, i64 %idxprom
+  %4 = load float, ptr %arrayidx, align 4
+  %conv = fpext float %4 to double
+  store double %conv, ptr %rngVal, align 8
+  %5 = load double, ptr %Y.addr, align 8
+  %6 = load double, ptr %rngVal, align 8
+  %mul = fmul fast double %5, %6
+  %7 = load double, ptr %Z.addr, align 8
+  %sub = fsub fast double %mul, %7
+  store double %sub, ptr %callValue, align 8
+  %8 = load double, ptr %callValue, align 8
+  %cmp1 = fcmp fast ogt double %8, 0.000000e+00
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %9 = load double, ptr %callValue, align 8
+  %10 = load double, ptr %v0, align 8
+  %add = fadd fast double %10, %9
+  store double %add, ptr %v0, align 8
+  %11 = load double, ptr %callValue, align 8
+  %12 = load double, ptr %callValue, align 8
+  %mul3 = fmul fast double %11, %12
+  %13 = load double, ptr %v1, align 8
+  %add4 = fadd fast double %13, %mul3
+  store double %add4, ptr %v1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %14 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %14, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %15 = load double, ptr %v0, align 8
+  %16 = load double, ptr %v1, align 8
+  %add5 = fadd fast double %15, %16
+  call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #2
+  call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #2
+  call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #2
+  call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #2
+  call void @llvm.lifetime.end.p0(i64 4, ptr %block) #2
+  call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2
+  ret double %add5
+}
+
+define nofpclass(nan inf) double @monte_exp(i32 noundef %nblocks, i32 noundef %RAND_BLOCK_LENGTH, ptr noundef %samples, double noundef nofpclass(nan inf) %Y, double noundef nofpclass(nan inf) %Z) {
+; CHECK-LABEL: define nofpclass(nan inf) double @monte_exp(
+; CHECK-SAME: i32 noundef [[NBLOCKS:%.*]], i32 noundef [[RAND_BLOCK_LENGTH:%.*]], ptr noundef [[SAMPLES:%.*]], double noundef nofpclass(nan inf) [[Y:%.*]], double noundef nofpclass(nan inf) [[Z:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[CMP16:%.*]] = icmp sgt i32 [[NBLOCKS]], 0
+; CHECK-NEXT:    br i1 [[CMP16]], label %[[FOR_BODY_LR_PH:.*]], label %[[FOR_END10:.*]]
+; CHECK:       [[FOR_BODY_LR_PH]]:
+; CHECK-NEXT:    [[CMP211:%.*]] = icmp sgt i32 [[RAND_BLOCK_LENGTH]], 0
+; CHECK-NEXT:    br i1 [[CMP211]], label %[[FOR_BODY_US_PREHEADER:.*]], label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_BODY_US_PREHEADER]]:
+; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[RAND_BLOCK_LENGTH]] to i64
+; CHECK-NEXT:    br label %[[FOR_BODY_US:.*]]
+; CHECK:       [[FOR_BODY_US]]:
+; CHECK-NEXT:    [[V1_019_US:%.*]] = phi double [ [[V1_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US:.*]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
+; CHECK-NEXT:    [[V0_018_US:%.*]] = phi double [ [[V0_2_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY_US_PREHEADER]] ]
+; CHECK-NEXT:    [[BLOCK_017_US:%.*]] = phi i32 [ [[INC9_US:%.*]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0, %[[FOR_BODY_US_PREHEADER]] ]
+; CHECK-NEXT:    tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
+; CHECK-NEXT:    br label %[[FOR_BODY3_US:.*]]
+; CHECK:       [[FOR_BODY3_US]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_US]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY3_US]] ]
+; CHECK-NEXT:    [[V1_114_US:%.*]] = phi double [ [[V1_019_US]], %[[FOR_BODY_US]] ], [ [[V1_2_US]], %[[FOR_BODY3_US]] ]
+; CHECK-NEXT:    [[V0_113_US:%.*]] = phi double [ [[V0_018_US]], %[[FOR_BODY_US]] ], [ [[V0_2_US]], %[[FOR_BODY3_US]] ]
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds float, ptr [[SAMPLES]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_US]], align 4
+; CHECK-NEXT:    [[CONV_US:%.*]] = fpext float [[TMP0]] to double
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast double @llvm.exp2.f64(double [[CONV_US]])
+; CHECK-NEXT:    [[MUL_US:%.*]] = fmul fast double [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[SUB_US:%.*]] = fsub fast double [[MUL_US]], [[Z]]
+; CHECK-NEXT:    [[CMP4_US:%.*]] = fcmp fast ogt double [[SUB_US]], 0.000000e+00
+; CHECK-NEXT:    [[ADD_US:%.*]] = fadd fast double [[SUB_US]], [[V0_113_US]]
+; CHECK-NEXT:    [[MUL6_US:%.*]] = fmul fast double [[SUB_US]], [[SUB_US]]
+; CHECK-NEXT:    [[ADD7_US:%.*]] = fadd fast double [[MUL6_US]], [[V1_114_US]]
+; CHECK-NEXT:    [[V0_2_US]] = select i1 [[CMP4_US]], double [[ADD_US]], double [[V0_113_US]]
+; CHECK-NEXT:    [[V1_2_US]] = select i1 [[CMP4_US]], double [[ADD7_US]], double [[V1_114_US]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND25_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEXT:    br i1 [[EXITCOND25_NOT]], label %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]], label %[[FOR_BODY3_US]]
+; CHECK:       [[FOR_COND1_FOR_INC8_CRIT_EDGE_US]]:
+; CHECK-NEXT:    [[INC9_US]] = add nuw nsw i32 [[BLOCK_017_US]], 1
+; CHECK-NEXT:    [[EXITCOND26_NOT:%.*]] = icmp eq i32 [[INC9_US]], [[NBLOCKS]]
+; CHECK-NEXT:    br i1 [[EXITCOND26_NOT]], label %[[FOR_END10]], label %[[FOR_BODY_US]]
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[BLOCK_017:%.*]] = phi i32 [ [[INC9:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_LR_PH]] ]
+; CHECK-NEXT:    tail call void @resample(i32 noundef [[RAND_BLOCK_LENGTH]], ptr noundef [[SAMPLES]])
+; CHECK-NEXT:    [[INC9]] = add nuw nsw i32 [[BLOCK_017]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC9]], [[NBLOCKS]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END10]], label %[[FOR_BODY]]
+; CHECK:       [[FOR_END10]]:
+; CHECK-NEXT:    [[V0_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V0_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[V1_0_LCSSA:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[V1_2_US]], %[[FOR_COND1_FOR_INC8_CRIT_EDGE_US]] ], [ 0.000000e+00, %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[ADD11:%.*]] = fadd fast double [[V1_0_LCSSA]], [[V0_0_LCSSA]]
+; CHECK-NEXT:    ret double [[ADD11]]
+;
+entry:
+  %nblocks.addr = alloca i32, align 4
+  %RAND_BLOCK_LENGTH.addr = alloca i32, align 4
+  %samples.addr = alloca ptr, align 8
+  %Y.addr = alloca double, align 8
+  %Z.addr = alloca double, align 8
+  %i = alloca i32, align 4
+  %block = alloca i32, align 4
+  %rngVal = alloca double, align 8
+  %callValue = alloca double, align 8
+  %v0 = alloca double, align 8
+  %v1 = alloca double, align 8
+  store i32 %nblocks, ptr %nblocks.addr, align 4
+  store i32 %RAND_BLOCK_LENGTH, ptr %RAND_BLOCK_LENGTH.addr, align 4
+  store ptr %samples, ptr %samples.addr, align 8
+  store double %Y, ptr %Y.addr, align 8
+  store double %Z, ptr %Z.addr, align 8
+  call void @llvm.lifetime.start.p0(i64 4, ptr %i) #4
+  call void @llvm.lifetime.start.p0(i64 4, ptr %block) #4
+  call void @llvm.lifetime.start.p0(i64 8, ptr %rngVal) #4
+  call void @llvm.lifetime.start.p0(i64 8, ptr %callValue) #4
+  call void @llvm.lifetime.start.p0(i64 8, ptr %v0) #4
+  store double 0.000000e+00, ptr %v0, align 8
+  call void @llvm.lifetime.start.p0(i64 8, ptr %v1) #4
+  store double 0.000000e+00, ptr %v1, align 8
+  store i32 0, ptr %block, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc8, %entry
+  %0 = load i32, ptr %block, align 4
+  %1 = load i32, ptr %nblocks.addr, align 4
+  %cmp = icmp slt i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end10
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
+  %3 = load ptr, ptr %samples.addr, align 8
+  call void @resample(i32 noundef %2, ptr noundef %3)
+  store i32 0, ptr %i, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %4 = load i32, ptr %i, align 4
+  %5 = load i32, ptr %RAND_BLOCK_LENGTH.addr, align 4
+  %cmp2 = icmp slt i32 %4, %5
+  br i1 %cmp2, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %6 = load ptr, ptr %samples.addr, align 8
+  %7 = load i32, ptr %i, align 4
+  %idxprom = sext i32 %7 to i64
+  %arrayidx = getelementptr inbounds float, ptr %6, i64 %idxprom
+  %8 = load float, ptr %arrayidx, align 4
+  %conv = fpext float %8 to double
+  store double %conv, ptr %rngVal, align 8
+  %9 = load double, ptr %Y.addr, align 8
+  %10 = load double, ptr %rngVal, align 8
+  %11 = call fast double @llvm.exp2.f64(double %10)
+  %mul = fmul fast double %9, %11
+  %12 = load double, ptr %Z.addr, align 8
+  %sub = fsub fast double %mul, %12
+  store double %sub, ptr %callValue, align 8
+  %13 = load double, ptr %callValue, align 8
+  %cmp4 = fcmp fast ogt double %13, 0.000000e+00
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body3
+  %14 = load double, ptr %callValue, align 8
+  %15 = load double, ptr %v0, align 8
+  %add = fadd fast double %15, %14
+  store double %add, ptr %v0, align 8
+  %16 = load double, ptr %callValue, align 8
+  %17 = load double, ptr %callValue, align 8
+  %mul6 = fmul fast double %16, %17
+  %18 = load double, ptr %v1, align 8
+  %add7 = fadd fast double %18, %mul6
+  store double %add7, ptr %v1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body3
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %19 = load i32, ptr %i, align 4
+  %inc = add nsw i32 %19, 1
+  store i32 %inc, ptr %i, align 4
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.end
+  %20 = load i32, ptr %block, align 4
+  %inc9 = add nsw i32 %20, 1
+  store i32 %inc9, ptr %block, align 4
+  br label %for.cond
+
+for.end10:                                        ; preds = %for.cond
+  %21 = load double, ptr %v0, align 8
+  %22 = load double, ptr %v1, align 8
+  %add11 = fadd fast double %21, %22
+  call void @llvm.lifetime.end.p0(i64 8, ptr %v1) #4
+  call void @llvm.lifetime.end.p0(i64 8, ptr %v0) #4
+  call void @llvm.lifetime.end.p0(i64 8, ptr %callValue) #4
+  call void @llvm.lifetime.end.p0(i64 8, ptr %rngVal) #4
+  call void @llvm.lifetime.end.p0(i64 4, ptr %block) #4
+  call void @llvm.lifetime.end.p0(i64 4, ptr %i) #4
+  ret double %add11
+}
+
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
+declare void @resample(i32 noundef, ptr noundef)
+declare double @llvm.exp2.f64(double)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
new file mode 100644
index 000000000000000..4a024cc4c0309c1
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
+
+define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
+; SSE-LABEL: @PR50392(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
+; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; SSE-NEXT:    [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
+; SSE-NEXT:    [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
+; SSE-NEXT:    [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
+; SSE-NEXT:    [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
+; SSE-NEXT:    ret <4 x double> [[SHUFFLE]]
+;
+; AVX1-LABEL: @PR50392(
+; AVX1-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
+; AVX1-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
+; AVX1-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; AVX1-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; AVX1-NEXT:    [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2
+; AVX1-NEXT:    [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3
+; AVX1-NEXT:    [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]]
+; AVX1-NEXT:    [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3
+; AVX1-NEXT:    ret <4 x double> [[SHUFFLE]]
+;
+; AVX2-LABEL: @PR50392(
+; AVX2-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
+; AVX2-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
+; AVX2-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; AVX2-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
+; AVX2-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
+; AVX2-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
+; AVX2-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>
+; AVX2-NEXT:    ret <4 x double> [[SHUFFLE]]
+;
+  %vecext = extractelement <4 x double> %a, i32 0
+  %vecext1 = extractelement <4 x double> %a, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <4 x double> poison, double %add, i32 0
+  %vecext2 = extractelement <4 x double> %a, i32 2
+  %vecext3 = extractelement <4 x double> %a, i32 3
+  %add4 = fadd double %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1
+  %vecext6 = extractelement <4 x double> %b, i32 0
+  %vecext7 = extractelement <4 x double> %b, i32 1
+  %add8 = fadd double %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2
+  %vecext10 = extractelement <4 x double> %b, i32 2
+  %vecext11 = extractelement <4 x double> %b, i32 3
+  %add12 = fadd double %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3
+  %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 2, i32 3>
+  ret <4 x double> %shuffle
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX: {{.*}}
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll
new file mode 100644
index 000000000000000..1d4cee45b668565
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3                   -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3                   -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=SSE
+; RUN: opt -mtriple=x86_64-- -mcpu=btver2    -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s  | FileCheck %s --check-prefixes=AVX,AVX2
+
+define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
+; SSE-LABEL: @PR94546(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
+; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
+; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
+; SSE-NEXT:    ret <4 x double> [[TMP4]]
+;
+; AVX-LABEL: @PR94546(
+; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
+; AVX-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
+; AVX-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
+; AVX-NEXT:    [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
+; AVX-NEXT:    ret <4 x double> [[TMP4]]
+;
+  %vecext = extractelement <4 x double> %a, i32 0
+  %vecext1 = extractelement <4 x double> %a, i32 1
+  %add = fadd double %vecext, %vecext1
+  %vecinit = insertelement <4 x double> poison, double %add, i32 0
+  %vecext2 = extractelement <4 x double> %a, i32 2
+  %vecext3 = extractelement <4 x double> %a, i32 3
+  %add4 = fadd double %vecext2, %vecext3
+  %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1
+  %vecext6 = extractelement <4 x double> %b, i32 0
+  %vecext7 = extractelement <4 x double> %b, i32 1
+  %add8 = fadd double %vecext6, %vecext7
+  %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2
+  %vecext10 = extractelement <4 x double> %b, i32 2
+  %vecext11 = extractelement <4 x double> %b, i32 3
+  %add12 = fadd double %vecext10, %vecext11
+  %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3
+  %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> <i32 0, i32 poison, i32 poison, i32 3>
+  ret <4 x double> %shuffle
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; AVX1: {{.*}}
+; AVX2: {{.*}}
diff --git a/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll b/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll
index 3bf7d9578b560a2..a6c023a25608b42 100644
--- a/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll
+++ b/llvm/test/Transforms/SCCP/strictfp-phis-fcmp.ll
@@ -19,7 +19,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
 
   ret i1 %c
 }
@@ -42,7 +42,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
 
   ret i1 %c
 }
@@ -66,7 +66,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
 
   ret i1 %c
 }
@@ -91,7 +91,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 2.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
   ret i1 %c
 }
 
@@ -115,7 +115,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 2.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
   ret i1 %c
 }
 
@@ -139,7 +139,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 2.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
   ret i1 %c
 }
 
@@ -163,7 +163,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ %f, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
   ret i1 %c
 }
 
@@ -187,7 +187,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ %f, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
   ret i1 %c
 }
 
@@ -211,7 +211,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ %f, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
   ret i1 %c
 }
 
@@ -236,7 +236,7 @@ dead:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0
   ret i1 %c
 }
 
@@ -261,7 +261,7 @@ dead:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0
   ret i1 %c
 }
 
@@ -288,11 +288,11 @@ dead:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ]
-  %c = call i1 @llvm.experimental.constrained.fcmp.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmp.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0
   ret i1 %c
 }
 
 attributes #0 = { strictfp }
 
-declare i1 @llvm.experimental.constrained.fcmp.i1.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata)
 
diff --git a/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll b/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll
index 6db1f47ccca9977..213293a78593851 100644
--- a/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll
+++ b/llvm/test/Transforms/SCCP/strictfp-phis-fcmps.ll
@@ -19,7 +19,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
 
   ret i1 %c
 }
@@ -42,7 +42,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
 
   ret i1 %c
 }
@@ -66,7 +66,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
 
   ret i1 %c
 }
@@ -91,7 +91,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 2.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
   ret i1 %c
 }
 
@@ -115,7 +115,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 2.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
   ret i1 %c
 }
 
@@ -139,7 +139,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 2.0, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
   ret i1 %c
 }
 
@@ -163,7 +163,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ %f, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.ignore") #0
   ret i1 %c
 }
 
@@ -187,7 +187,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ %f, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.maytrap") #0
   ret i1 %c
 }
 
@@ -211,7 +211,7 @@ if.true:
 
 end:
   %p = phi float [ 1.0, %entry ], [ %f, %if.true]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"ueq", metadata !"fpexcept.strict") #0
   ret i1 %c
 }
 
@@ -236,7 +236,7 @@ dead:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.ignore") #0
   ret i1 %c
 }
 
@@ -261,7 +261,7 @@ dead:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.maytrap") #0
   ret i1 %c
 }
 
@@ -288,11 +288,11 @@ dead:
 
 end:
   %p = phi float [ 1.0, %entry ], [ 1.0, %if.true], [ %f, %dead ]
-  %c = call i1 @llvm.experimental.constrained.fcmps.i1.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0
+  %c = call i1 @llvm.experimental.constrained.fcmps.f32(float %p, float 1.0, metadata !"une", metadata !"fpexcept.strict") #0
   ret i1 %c
 }
 
 attributes #0 = { strictfp }
 
-declare i1 @llvm.experimental.constrained.fcmps.i1.f32(float, float, metadata, metadata)
+declare i1 @llvm.experimental.constrained.fcmps.f32(float, float, metadata, metadata)
 
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
new file mode 100644
index 000000000000000..e972955e26cb475
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/div.ll
@@ -0,0 +1,553 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=aarch64 -passes=slp-vectorizer -mattr=-sve -S < %s | FileCheck %s --check-prefixes=CHECK,NO-SVE
+; RUN: opt -mtriple=aarch64 -passes=slp-vectorizer -mattr=+sve -S < %s | FileCheck %s --check-prefixes=CHECK,SVE
+
+define <2 x i8> @slp_v2i8_Op1_Op2_unknown(<2 x i8> %a, <2 x i8> %b)
+; NO-SVE-LABEL: define <2 x i8> @slp_v2i8_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i8> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i8> [[A]], i32 1
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <2 x i8> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <2 x i8> [[B]], i32 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <2 x i8> [[R0]], i8 [[TMP2]], i32 1
+; NO-SVE-NEXT:    ret <2 x i8> [[R1]]
+;
+; SVE-LABEL: define <2 x i8> @slp_v2i8_Op1_Op2_unknown(
+; SVE-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i8> [[A]], [[B]]
+; SVE-NEXT:    ret <2 x i8> [[TMP1]]
+;
+{
+  %a0 = extractelement <2 x i8> %a, i32 0
+  %a1 = extractelement <2 x i8> %a, i32 1
+  %b0 = extractelement <2 x i8> %b, i32 0
+  %b1 = extractelement <2 x i8> %b, i32 1
+  %1 = sdiv i8 %a0, %b0
+  %2 = sdiv i8 %a1, %b1
+  %r0 = insertelement <2 x i8> poison, i8 %1, i32 0
+  %r1 = insertelement <2 x i8> %r0, i8 %2, i32 1
+  ret <2 x i8> %r1
+}
+
+define <2 x i16> @slp_v2i16_Op1_Op2_unknown(<2 x i16> %a, <2 x i16> %b)
+; NO-SVE-LABEL: define <2 x i16> @slp_v2i16_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i16> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i16> [[A]], i32 1
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <2 x i16> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <2 x i16> [[B]], i32 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <2 x i16> [[R0]], i16 [[TMP2]], i32 1
+; NO-SVE-NEXT:    ret <2 x i16> [[R1]]
+;
+; SVE-LABEL: define <2 x i16> @slp_v2i16_Op1_Op2_unknown(
+; SVE-SAME: <2 x i16> [[A:%.*]], <2 x i16> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i16> [[A]], [[B]]
+; SVE-NEXT:    ret <2 x i16> [[TMP1]]
+;
+{
+  %a0 = extractelement <2 x i16> %a, i32 0
+  %a1 = extractelement <2 x i16> %a, i32 1
+  %b0 = extractelement <2 x i16> %b, i32 0
+  %b1 = extractelement <2 x i16> %b, i32 1
+  %1 = sdiv i16 %a0, %b0
+  %2 = sdiv i16 %a1, %b1
+  %r0 = insertelement <2 x i16> poison, i16 %1, i32 0
+  %r1 = insertelement <2 x i16> %r0, i16 %2, i32 1
+  ret <2 x i16> %r1
+}
+
+define <2 x i32> @slp_v2i32_Op1_Op2_unknown(<2 x i32> %a, <2 x i32> %b)
+; NO-SVE-LABEL: define <2 x i32> @slp_v2i32_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i32> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i32> [[A]], i32 1
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <2 x i32> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <2 x i32> [[B]], i32 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <2 x i32> [[R0]], i32 [[TMP2]], i32 1
+; NO-SVE-NEXT:    ret <2 x i32> [[R1]]
+;
+; SVE-LABEL: define <2 x i32> @slp_v2i32_Op1_Op2_unknown(
+; SVE-SAME: <2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[A]], [[B]]
+; SVE-NEXT:    ret <2 x i32> [[TMP1]]
+;
+{
+  %a0 = extractelement <2 x i32> %a, i32 0
+  %a1 = extractelement <2 x i32> %a, i32 1
+  %b0 = extractelement <2 x i32> %b, i32 0
+  %b1 = extractelement <2 x i32> %b, i32 1
+  %1 = sdiv i32 %a0, %b0
+  %2 = sdiv i32 %a1, %b1
+  %r0 = insertelement <2 x i32> poison, i32 %1, i32 0
+  %r1 = insertelement <2 x i32> %r0, i32 %2, i32 1
+  ret <2 x i32> %r1
+}
+
+define <2 x i64> @slp_v2i64_Op1_Op2_unknown(<2 x i64> %a, <2 x i64> %b)
+; NO-SVE-LABEL: define <2 x i64> @slp_v2i64_Op1_Op2_unknown(
+; NO-SVE-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <2 x i64> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <2 x i64> [[A]], i32 1
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <2 x i64> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <2 x i64> [[B]], i32 1
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i64 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i64 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <2 x i64> [[R0]], i64 [[TMP2]], i32 1
+; NO-SVE-NEXT:    ret <2 x i64> [[R1]]
+;
+; SVE-LABEL: define <2 x i64> @slp_v2i64_Op1_Op2_unknown(
+; SVE-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <2 x i64> [[A]], [[B]]
+; SVE-NEXT:    ret <2 x i64> [[TMP1]]
+;
+{
+  %a0 = extractelement <2 x i64> %a, i32 0
+  %a1 = extractelement <2 x i64> %a, i32 1
+  %b0 = extractelement <2 x i64> %b, i32 0
+  %b1 = extractelement <2 x i64> %b, i32 1
+  %1 = sdiv i64 %a0, %b0
+  %2 = sdiv i64 %a1, %b1
+  %r0 = insertelement <2 x i64> poison, i64 %1, i32 0
+  %r1 = insertelement <2 x i64> %r0, i64 %2, i32 1
+  ret <2 x i64> %r1
+}
+
+define <4 x i8> @slp_v4i8_Op1_Op2_unknown(<4 x i8> %a, <4 x i8> %b)
+; NO-SVE-LABEL: define <4 x i8> @slp_v4i8_Op1_Op2_unknown(
+; NO-SVE-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i8> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i8> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i8> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i8> [[A]], i32 3
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <4 x i8> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <4 x i8> [[B]], i32 1
+; NO-SVE-NEXT:    [[B2:%.*]] = extractelement <4 x i8> [[B]], i32 2
+; NO-SVE-NEXT:    [[B3:%.*]] = extractelement <4 x i8> [[B]], i32 3
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i8 [[A2]], [[B2]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i8 [[A3]], [[B3]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i8> [[R0]], i8 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i8> [[R1]], i8 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i8> [[R2]], i8 [[TMP4]], i32 3
+; NO-SVE-NEXT:    ret <4 x i8> [[R3]]
+;
+; SVE-LABEL: define <4 x i8> @slp_v4i8_Op1_Op2_unknown(
+; SVE-SAME: <4 x i8> [[A:%.*]], <4 x i8> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i8> [[A]], [[B]]
+; SVE-NEXT:    ret <4 x i8> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i8> %a, i32 0
+  %a1 = extractelement <4 x i8> %a, i32 1
+  %a2 = extractelement <4 x i8> %a, i32 2
+  %a3 = extractelement <4 x i8> %a, i32 3
+  %b0 = extractelement <4 x i8> %b, i32 0
+  %b1 = extractelement <4 x i8> %b, i32 1
+  %b2 = extractelement <4 x i8> %b, i32 2
+  %b3 = extractelement <4 x i8> %b, i32 3
+  %1 = sdiv i8 %a0, %b0
+  %2 = sdiv i8 %a1, %b1
+  %3 = sdiv i8 %a2, %b2
+  %4 = sdiv i8 %a3, %b3
+  %r0 = insertelement <4 x i8> poison, i8 %1, i32 0
+  %r1 = insertelement <4 x i8> %r0, i8 %2, i32 1
+  %r2 = insertelement <4 x i8> %r1, i8 %3, i32 2
+  %r3 = insertelement <4 x i8> %r2, i8 %4, i32 3
+  ret <4 x i8> %r3
+}
+
+define <4 x i16> @slp_v4i16_Op1_Op2_unknown(<4 x i16> %a, <4 x i16> %b)
+; NO-SVE-LABEL: define <4 x i16> @slp_v4i16_Op1_Op2_unknown(
+; NO-SVE-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i16> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i16> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i16> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i16> [[A]], i32 3
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <4 x i16> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <4 x i16> [[B]], i32 1
+; NO-SVE-NEXT:    [[B2:%.*]] = extractelement <4 x i16> [[B]], i32 2
+; NO-SVE-NEXT:    [[B3:%.*]] = extractelement <4 x i16> [[B]], i32 3
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i16 [[A2]], [[B2]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i16 [[A3]], [[B3]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i16> [[R0]], i16 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i16> [[R1]], i16 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i16> [[R2]], i16 [[TMP4]], i32 3
+; NO-SVE-NEXT:    ret <4 x i16> [[R3]]
+;
+; SVE-LABEL: define <4 x i16> @slp_v4i16_Op1_Op2_unknown(
+; SVE-SAME: <4 x i16> [[A:%.*]], <4 x i16> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i16> [[A]], [[B]]
+; SVE-NEXT:    ret <4 x i16> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i16> %a, i32 0
+  %a1 = extractelement <4 x i16> %a, i32 1
+  %a2 = extractelement <4 x i16> %a, i32 2
+  %a3 = extractelement <4 x i16> %a, i32 3
+  %b0 = extractelement <4 x i16> %b, i32 0
+  %b1 = extractelement <4 x i16> %b, i32 1
+  %b2 = extractelement <4 x i16> %b, i32 2
+  %b3 = extractelement <4 x i16> %b, i32 3
+  %1 = sdiv i16 %a0, %b0
+  %2 = sdiv i16 %a1, %b1
+  %3 = sdiv i16 %a2, %b2
+  %4 = sdiv i16 %a3, %b3
+  %r0 = insertelement <4 x i16> poison, i16 %1, i32 0
+  %r1 = insertelement <4 x i16> %r0, i16 %2, i32 1
+  %r2 = insertelement <4 x i16> %r1, i16 %3, i32 2
+  %r3 = insertelement <4 x i16> %r2, i16 %4, i32 3
+  ret <4 x i16> %r3
+}
+
+define <4 x i32> @slp_v4i32_Op1_Op2_unknown(<4 x i32> %a, <4 x i32> %b)
+; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_Op2_unknown(
+; NO-SVE-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <4 x i32> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <4 x i32> [[B]], i32 1
+; NO-SVE-NEXT:    [[B2:%.*]] = extractelement <4 x i32> [[B]], i32 2
+; NO-SVE-NEXT:    [[B3:%.*]] = extractelement <4 x i32> [[B]], i32 3
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i32 [[A2]], [[B2]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i32 [[A3]], [[B3]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
+; NO-SVE-NEXT:    ret <4 x i32> [[R3]]
+;
+; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_Op2_unknown(
+; SVE-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], [[B]]
+; SVE-NEXT:    ret <4 x i32> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i32> %a, i32 0
+  %a1 = extractelement <4 x i32> %a, i32 1
+  %a2 = extractelement <4 x i32> %a, i32 2
+  %a3 = extractelement <4 x i32> %a, i32 3
+  %b0 = extractelement <4 x i32> %b, i32 0
+  %b1 = extractelement <4 x i32> %b, i32 1
+  %b2 = extractelement <4 x i32> %b, i32 2
+  %b3 = extractelement <4 x i32> %b, i32 3
+  %1 = sdiv i32 %a0, %b0
+  %2 = sdiv i32 %a1, %b1
+  %3 = sdiv i32 %a2, %b2
+  %4 = sdiv i32 %a3, %b3
+  %r0 = insertelement <4 x i32> poison, i32 %1, i32 0
+  %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
+  ret <4 x i32> %r3
+}
+
+define <8 x i8> @slp_v8i8_Op1_Op2_unknown(<8 x i8> %a, <8 x i8> %b)
+; NO-SVE-LABEL: define <8 x i8> @slp_v8i8_Op1_Op2_unknown(
+; NO-SVE-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <8 x i8> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <8 x i8> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <8 x i8> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <8 x i8> [[A]], i32 3
+; NO-SVE-NEXT:    [[A4:%.*]] = extractelement <8 x i8> [[A]], i32 4
+; NO-SVE-NEXT:    [[A5:%.*]] = extractelement <8 x i8> [[A]], i32 5
+; NO-SVE-NEXT:    [[A6:%.*]] = extractelement <8 x i8> [[A]], i32 6
+; NO-SVE-NEXT:    [[A7:%.*]] = extractelement <8 x i8> [[A]], i32 7
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <8 x i8> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <8 x i8> [[B]], i32 1
+; NO-SVE-NEXT:    [[B2:%.*]] = extractelement <8 x i8> [[B]], i32 2
+; NO-SVE-NEXT:    [[B3:%.*]] = extractelement <8 x i8> [[B]], i32 3
+; NO-SVE-NEXT:    [[B4:%.*]] = extractelement <8 x i8> [[B]], i32 4
+; NO-SVE-NEXT:    [[B5:%.*]] = extractelement <8 x i8> [[B]], i32 5
+; NO-SVE-NEXT:    [[B6:%.*]] = extractelement <8 x i8> [[B]], i32 6
+; NO-SVE-NEXT:    [[B7:%.*]] = extractelement <8 x i8> [[B]], i32 7
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i8 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i8 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i8 [[A2]], [[B2]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i8 [[A3]], [[B3]]
+; NO-SVE-NEXT:    [[TMP5:%.*]] = sdiv i8 [[A4]], [[B4]]
+; NO-SVE-NEXT:    [[TMP6:%.*]] = sdiv i8 [[A5]], [[B5]]
+; NO-SVE-NEXT:    [[TMP7:%.*]] = sdiv i8 [[A6]], [[B6]]
+; NO-SVE-NEXT:    [[TMP8:%.*]] = sdiv i8 [[A7]], [[B7]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <8 x i8> [[R0]], i8 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <8 x i8> [[R1]], i8 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <8 x i8> [[R2]], i8 [[TMP4]], i32 3
+; NO-SVE-NEXT:    [[R4:%.*]] = insertelement <8 x i8> [[R3]], i8 [[TMP5]], i32 4
+; NO-SVE-NEXT:    [[R5:%.*]] = insertelement <8 x i8> [[R4]], i8 [[TMP6]], i32 5
+; NO-SVE-NEXT:    [[R6:%.*]] = insertelement <8 x i8> [[R5]], i8 [[TMP7]], i32 6
+; NO-SVE-NEXT:    [[R7:%.*]] = insertelement <8 x i8> [[R6]], i8 [[TMP8]], i32 7
+; NO-SVE-NEXT:    ret <8 x i8> [[R3]]
+;
+; SVE-LABEL: define <8 x i8> @slp_v8i8_Op1_Op2_unknown(
+; SVE-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SVE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SVE-NEXT:    [[TMP3:%.*]] = sdiv <4 x i8> [[TMP1]], [[TMP2]]
+; SVE-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SVE-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SVE-NEXT:    [[TMP6:%.*]] = sdiv <4 x i8> [[TMP4]], [[TMP5]]
+; SVE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SVE-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i8> [[TMP6]], <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SVE-NEXT:    [[R71:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SVE-NEXT:    ret <8 x i8> [[TMP7]]
+;
+{
+  %a0 = extractelement <8 x i8> %a, i32 0
+  %a1 = extractelement <8 x i8> %a, i32 1
+  %a2 = extractelement <8 x i8> %a, i32 2
+  %a3 = extractelement <8 x i8> %a, i32 3
+  %a4 = extractelement <8 x i8> %a, i32 4
+  %a5 = extractelement <8 x i8> %a, i32 5
+  %a6 = extractelement <8 x i8> %a, i32 6
+  %a7 = extractelement <8 x i8> %a, i32 7
+  %b0 = extractelement <8 x i8> %b, i32 0
+  %b1 = extractelement <8 x i8> %b, i32 1
+  %b2 = extractelement <8 x i8> %b, i32 2
+  %b3 = extractelement <8 x i8> %b, i32 3
+  %b4 = extractelement <8 x i8> %b, i32 4
+  %b5 = extractelement <8 x i8> %b, i32 5
+  %b6 = extractelement <8 x i8> %b, i32 6
+  %b7 = extractelement <8 x i8> %b, i32 7
+  %1 = sdiv i8 %a0, %b0
+  %2 = sdiv i8 %a1, %b1
+  %3 = sdiv i8 %a2, %b2
+  %4 = sdiv i8 %a3, %b3
+  %5 = sdiv i8 %a4, %b4
+  %6 = sdiv i8 %a5, %b5
+  %7 = sdiv i8 %a6, %b6
+  %8 = sdiv i8 %a7, %b7
+  %r0 = insertelement <8 x i8> poison, i8 %1, i32 0
+  %r1 = insertelement <8 x i8> %r0, i8 %2, i32 1
+  %r2 = insertelement <8 x i8> %r1, i8 %3, i32 2
+  %r3 = insertelement <8 x i8> %r2, i8 %4, i32 3
+  %r4 = insertelement <8 x i8> %r3, i8 %5, i32 4
+  %r5 = insertelement <8 x i8> %r4, i8 %6, i32 5
+  %r6 = insertelement <8 x i8> %r5, i8 %7, i32 6
+  %r7 = insertelement <8 x i8> %r6, i8 %8, i32 7
+  ret <8 x i8> %r3
+}
+
+define <8 x i16> @slp_v8i16_Op1_Op2_unknown(<8 x i16> %a, <8 x i16> %b)
+; NO-SVE-LABEL: define <8 x i16> @slp_v8i16_Op1_Op2_unknown(
+; NO-SVE-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <8 x i16> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <8 x i16> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <8 x i16> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <8 x i16> [[A]], i32 3
+; NO-SVE-NEXT:    [[A4:%.*]] = extractelement <8 x i16> [[A]], i32 4
+; NO-SVE-NEXT:    [[A5:%.*]] = extractelement <8 x i16> [[A]], i32 5
+; NO-SVE-NEXT:    [[A6:%.*]] = extractelement <8 x i16> [[A]], i32 6
+; NO-SVE-NEXT:    [[A7:%.*]] = extractelement <8 x i16> [[A]], i32 7
+; NO-SVE-NEXT:    [[B0:%.*]] = extractelement <8 x i16> [[B]], i32 0
+; NO-SVE-NEXT:    [[B1:%.*]] = extractelement <8 x i16> [[B]], i32 1
+; NO-SVE-NEXT:    [[B2:%.*]] = extractelement <8 x i16> [[B]], i32 2
+; NO-SVE-NEXT:    [[B3:%.*]] = extractelement <8 x i16> [[B]], i32 3
+; NO-SVE-NEXT:    [[B4:%.*]] = extractelement <8 x i16> [[B]], i32 4
+; NO-SVE-NEXT:    [[B5:%.*]] = extractelement <8 x i16> [[B]], i32 5
+; NO-SVE-NEXT:    [[B6:%.*]] = extractelement <8 x i16> [[B]], i32 6
+; NO-SVE-NEXT:    [[B7:%.*]] = extractelement <8 x i16> [[B]], i32 7
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i16 [[A0]], [[B0]]
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i16 [[A1]], [[B1]]
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i16 [[A2]], [[B2]]
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i16 [[A3]], [[B3]]
+; NO-SVE-NEXT:    [[TMP5:%.*]] = sdiv i16 [[A4]], [[B4]]
+; NO-SVE-NEXT:    [[TMP6:%.*]] = sdiv i16 [[A5]], [[B5]]
+; NO-SVE-NEXT:    [[TMP7:%.*]] = sdiv i16 [[A6]], [[B6]]
+; NO-SVE-NEXT:    [[TMP8:%.*]] = sdiv i16 [[A7]], [[B7]]
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <8 x i16> [[R0]], i16 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <8 x i16> [[R1]], i16 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <8 x i16> [[R2]], i16 [[TMP4]], i32 3
+; NO-SVE-NEXT:    [[R4:%.*]] = insertelement <8 x i16> [[R3]], i16 [[TMP5]], i32 4
+; NO-SVE-NEXT:    [[R5:%.*]] = insertelement <8 x i16> [[R4]], i16 [[TMP6]], i32 5
+; NO-SVE-NEXT:    [[R6:%.*]] = insertelement <8 x i16> [[R5]], i16 [[TMP7]], i32 6
+; NO-SVE-NEXT:    [[R7:%.*]] = insertelement <8 x i16> [[R6]], i16 [[TMP8]], i32 7
+; NO-SVE-NEXT:    ret <8 x i16> [[R3]]
+;
+; SVE-LABEL: define <8 x i16> @slp_v8i16_Op1_Op2_unknown(
+; SVE-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SVE-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SVE-NEXT:    [[TMP3:%.*]] = sdiv <4 x i16> [[TMP1]], [[TMP2]]
+; SVE-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SVE-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; SVE-NEXT:    [[TMP6:%.*]] = sdiv <4 x i16> [[TMP4]], [[TMP5]]
+; SVE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SVE-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+; SVE-NEXT:    [[R71:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; SVE-NEXT:    ret <8 x i16> [[TMP7]]
+;
+{
+  %a0 = extractelement <8 x i16> %a, i32 0
+  %a1 = extractelement <8 x i16> %a, i32 1
+  %a2 = extractelement <8 x i16> %a, i32 2
+  %a3 = extractelement <8 x i16> %a, i32 3
+  %a4 = extractelement <8 x i16> %a, i32 4
+  %a5 = extractelement <8 x i16> %a, i32 5
+  %a6 = extractelement <8 x i16> %a, i32 6
+  %a7 = extractelement <8 x i16> %a, i32 7
+  %b0 = extractelement <8 x i16> %b, i32 0
+  %b1 = extractelement <8 x i16> %b, i32 1
+  %b2 = extractelement <8 x i16> %b, i32 2
+  %b3 = extractelement <8 x i16> %b, i32 3
+  %b4 = extractelement <8 x i16> %b, i32 4
+  %b5 = extractelement <8 x i16> %b, i32 5
+  %b6 = extractelement <8 x i16> %b, i32 6
+  %b7 = extractelement <8 x i16> %b, i32 7
+  %1 = sdiv i16 %a0, %b0
+  %2 = sdiv i16 %a1, %b1
+  %3 = sdiv i16 %a2, %b2
+  %4 = sdiv i16 %a3, %b3
+  %5 = sdiv i16 %a4, %b4
+  %6 = sdiv i16 %a5, %b5
+  %7 = sdiv i16 %a6, %b6
+  %8 = sdiv i16 %a7, %b7
+  %r0 = insertelement <8 x i16> poison, i16 %1, i32 0
+  %r1 = insertelement <8 x i16> %r0, i16 %2, i32 1
+  %r2 = insertelement <8 x i16> %r1, i16 %3, i32 2
+  %r3 = insertelement <8 x i16> %r2, i16 %4, i32 3
+  %r4 = insertelement <8 x i16> %r3, i16 %5, i32 4
+  %r5 = insertelement <8 x i16> %r4, i16 %6, i32 5
+  %r6 = insertelement <8 x i16> %r5, i16 %7, i32 6
+  %r7 = insertelement <8 x i16> %r6, i16 %8, i32 7
+  ret <8 x i16> %r3
+}
+
+define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(<4 x i32> %a)
+; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
+; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], 1
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 3
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i32 [[A2]], 5
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i32 [[A3]], 7
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
+; NO-SVE-NEXT:    ret <4 x i32> [[R3]]
+;
+; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const(
+; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 3, i32 5, i32 7>
+; SVE-NEXT:    ret <4 x i32> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i32> %a, i32 0
+  %a1 = extractelement <4 x i32> %a, i32 1
+  %a2 = extractelement <4 x i32> %a, i32 2
+  %a3 = extractelement <4 x i32> %a, i32 3
+  %1 = sdiv i32 %a0, 1
+  %2 = sdiv i32 %a1, 3
+  %3 = sdiv i32 %a2, 5
+  %4 = sdiv i32 %a3, 7
+  %r0 = insertelement <4 x i32> poison, i32 %1, i32 0
+  %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
+  ret <4 x i32> %r3
+}
+
+define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(<4 x i32> %a)
+; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const(
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i32> %a, i32 0
+  %a1 = extractelement <4 x i32> %a, i32 1
+  %a2 = extractelement <4 x i32> %a, i32 2
+  %a3 = extractelement <4 x i32> %a, i32 3
+  %1 = sdiv i32 %a0, 5
+  %2 = sdiv i32 %a1, 5
+  %3 = sdiv i32 %a2, 5
+  %4 = sdiv i32 %a3, 5
+  %r0 = insertelement <4 x i32> poison, i32 %1, i32 0
+  %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
+  ret <4 x i32> %r3
+}
+
+define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(<4 x i32> %a)
+; CHECK-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_uniform_const_pow2(
+; CHECK-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i32> %a, i32 0
+  %a1 = extractelement <4 x i32> %a, i32 1
+  %a2 = extractelement <4 x i32> %a, i32 2
+  %a3 = extractelement <4 x i32> %a, i32 3
+  %1 = sdiv i32 %a0, 4
+  %2 = sdiv i32 %a1, 4
+  %3 = sdiv i32 %a2, 4
+  %4 = sdiv i32 %a3, 4
+  %r0 = insertelement <4 x i32> poison, i32 %1, i32 0
+  %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
+  ret <4 x i32> %r3
+}
+
+define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(<4 x i32> %a)
+; NO-SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
+; NO-SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; NO-SVE-NEXT:    [[A0:%.*]] = extractelement <4 x i32> [[A]], i32 0
+; NO-SVE-NEXT:    [[A1:%.*]] = extractelement <4 x i32> [[A]], i32 1
+; NO-SVE-NEXT:    [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2
+; NO-SVE-NEXT:    [[A3:%.*]] = extractelement <4 x i32> [[A]], i32 3
+; NO-SVE-NEXT:    [[TMP1:%.*]] = sdiv i32 [[A0]], 1
+; NO-SVE-NEXT:    [[TMP2:%.*]] = sdiv i32 [[A1]], 2
+; NO-SVE-NEXT:    [[TMP3:%.*]] = sdiv i32 [[A2]], 4
+; NO-SVE-NEXT:    [[TMP4:%.*]] = sdiv i32 [[A3]], 8
+; NO-SVE-NEXT:    [[R0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
+; NO-SVE-NEXT:    [[R1:%.*]] = insertelement <4 x i32> [[R0]], i32 [[TMP2]], i32 1
+; NO-SVE-NEXT:    [[R2:%.*]] = insertelement <4 x i32> [[R1]], i32 [[TMP3]], i32 2
+; NO-SVE-NEXT:    [[R3:%.*]] = insertelement <4 x i32> [[R2]], i32 [[TMP4]], i32 3
+; NO-SVE-NEXT:    ret <4 x i32> [[R3]]
+;
+; SVE-LABEL: define <4 x i32> @slp_v4i32_Op1_unknown_Op2_const_pow2(
+; SVE-SAME: <4 x i32> [[A:%.*]]) #[[ATTR0]] {
+; SVE-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> [[A]], <i32 1, i32 2, i32 4, i32 8>
+; SVE-NEXT:    ret <4 x i32> [[TMP1]]
+;
+{
+  %a0 = extractelement <4 x i32> %a, i32 0
+  %a1 = extractelement <4 x i32> %a, i32 1
+  %a2 = extractelement <4 x i32> %a, i32 2
+  %a3 = extractelement <4 x i32> %a, i32 3
+  %1 = sdiv i32 %a0, 1
+  %2 = sdiv i32 %a1, 2
+  %3 = sdiv i32 %a2, 4
+  %4 = sdiv i32 %a3, 8
+  %r0 = insertelement <4 x i32> poison, i32 %1, i32 0
+  %r1 = insertelement <4 x i32> %r0, i32 %2, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %3, i32 2
+  %r3 = insertelement <4 x i32> %r2, i32 %4, i32 3
+  ret <4 x i32> %r3
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll
new file mode 100644
index 000000000000000..eb8dd72e0304d91
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -mtriple=s390x-unknown-linux -mcpu=z16 -S -passes=slp-vectorizer \
+; RUN:   -pass-remarks-output=%t | FileCheck %s
+; RUN: cat %t | FileCheck -check-prefix=REMARK %s
+;
+; NB! This is a pre-commit version (for #112491) with current codegen and remarks.
+;
+; Test functions that (at least currently) only gets vectorized if the
+; insertion cost for an element load is counted as free.
+
+; This function needs the free element load to be recognized in SLP
+; getGatherCost().
+define void @fun0(ptr nocapture %0, double %1) {
+; CHECK-LABEL: define void @fun0(
+; CHECK:         fmul double
+; CHECK:         call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    call double @llvm.sqrt.f64(
+; CHECK:         fmul double
+; CHECK:         call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    call double @llvm.sqrt.f64(
+;
+; REMARK-LABEL: Function: fun0
+; REMARK: Args:
+; REMARK-NEXT: - String:          'List vectorization was possible but not beneficial with cost '
+; REMARK-NEXT: - Cost:            '0'
+
+  %3 = fmul double %1, 2.000000e+00
+  %4 = tail call double @llvm.fmuladd.f64(double %3, double %3, double 0.000000e+00)
+  %5 = tail call double @llvm.fmuladd.f64(double %3, double %3, double %4)
+  %sqrt1 = tail call double @llvm.sqrt.f64(double %5)
+  %6 = load double, ptr %0, align 8
+  %7 = fmul double %6, 2.000000e+00
+  %8 = tail call double @llvm.fmuladd.f64(double %7, double %7, double 0.000000e+00)
+  %9 = tail call double @llvm.fmuladd.f64(double %7, double %7, double %8)
+  %sqrt = tail call double @llvm.sqrt.f64(double %9)
+  %10 = fadd double %sqrt1, %sqrt
+  store double %10, ptr %0, align 8
+  ret void
+}
+
+; This function needs the element-load to be recognized in SystemZ
+; getVectorInstrCost().
+define void @fun1(double %0) {
+; CHECK-LABEL: define void @fun1(
+; CHECK:         phi double
+; CHECK-NEXT:    phi double
+; CHECK-NEXT:    phi double
+; CHECK-NEXT:    phi double
+; CHECK-NEXT:    phi double
+; CHECK-NEXT:    phi double
+; CHECK-NEXT:    fsub double
+; CHECK-NEXT:    fsub double
+; CHECK-NEXT:    fmul double
+; CHECK-NEXT:    fmul double
+; CHECK-NEXT:    fsub double
+; CHECK-NEXT:    fsub double
+; CHECK-NEXT:    call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    fsub double
+; CHECK-NEXT:    fsub double
+; CHECK-NEXT:    call double @llvm.fmuladd.f64(
+; CHECK-NEXT:    call double @llvm.fmuladd.f64(
+; CHECK:         fcmp olt double
+; CHECK-NEXT:    fcmp olt double
+; CHECK-NEXT:    or i1
+;
+; REMARK-LABEL: Function: fun1
+; REMARK: Args:
+; REMARK:      - String:          'List vectorization was possible but not beneficial with cost '
+; REMARK-NEXT: - Cost:            '0'
+
+  br label %2
+
+2:
+  %3 = phi double [ poison, %1 ], [ poison, %2 ]
+  %4 = phi double [ undef, %1 ], [ poison, %2 ]
+  %5 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ]
+  %6 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ]
+  %7 = phi double [ 0.000000e+00, %1 ], [ poison, %2 ]
+  %8 = phi double [ 0.000000e+00, %1 ], [ %21, %2 ]
+  %9 = fsub double 0.000000e+00, %8
+  %10 = fsub double 0.000000e+00, %7
+  %11 = fmul double %9, 0.000000e+00
+  %12 = fmul double %10, 0.000000e+00
+  %13 = fsub double 0.000000e+00, %6
+  %14 = fsub double 0.000000e+00, %5
+  %15 = tail call double @llvm.fmuladd.f64(double %13, double %13, double %11)
+  %16 = tail call double @llvm.fmuladd.f64(double %14, double %14, double %12)
+  %17 = fsub double 0.000000e+00, %4
+  %18 = fsub double 0.000000e+00, %3
+  %19 = tail call double @llvm.fmuladd.f64(double %17, double %17, double %15)
+  %20 = tail call double @llvm.fmuladd.f64(double %18, double %18, double %16)
+  %21 = load double, ptr null, align 8
+  %22 = fcmp olt double %19, %0
+  %23 = fcmp olt double %20, 0.000000e+00
+  %24 = or i1 %23, %22
+  br label %2
+}
+
+declare double @llvm.fmuladd.f64(double, double, double)
+
+; This should *not* be vectorized as the insertion into the vector isn't free,
+; which is recognized in SystemZTTImpl::getScalarizationOverhead().
+define void @fun2(ptr %0, ptr %Dst) {
+; CHECK-LABEL: define void @fun2(
+; CHECK: insertelement
+; CHECK: store <2 x i64>
+;
+; REMARK-LABEL: Function: fun2
+; REMARK: Args:
+; REMARK-NEXT: - String:          'Stores SLP vectorized with cost '
+; REMARK-NEXT: - Cost:            '-1'
+
+  %3 = load i64, ptr %0, align 8
+  %4 = icmp eq i64 %3, 0
+  br i1 %4, label %5, label %6
+
+5:
+  ret void
+
+6:
+  %7 = getelementptr i8, ptr %Dst, i64 24
+  store i64 %3, ptr %7, align 8
+  %8 = getelementptr i8, ptr %Dst, i64 16
+  store i64 0, ptr %8, align 8
+  br label %5
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
new file mode 100644
index 000000000000000..f23043f0c47f4a3
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll
@@ -0,0 +1,601 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx2 | FileCheck %s --check-prefix=CHECK
+; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx2 -mattr=+f16c | FileCheck %s --check-prefix=CHECK-F16C
+; RUN: opt < %s -mtriple=x86_64-- -passes=slp-vectorizer -S -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-AVX512
+
+define void @fpext_v4xf16_v4xf32(ptr %s0, ptr %d0) {
+; CHECK-LABEL: define void @fpext_v4xf16_v4xf32(
+; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1
+; CHECK-NEXT:    [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2
+; CHECK-NEXT:    [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3
+; CHECK-NEXT:    [[L0:%.*]] = load half, ptr [[S0]], align 2
+; CHECK-NEXT:    [[L1:%.*]] = load half, ptr [[S1]], align 2
+; CHECK-NEXT:    [[L2:%.*]] = load half, ptr [[S2]], align 2
+; CHECK-NEXT:    [[L3:%.*]] = load half, ptr [[S3]], align 2
+; CHECK-NEXT:    [[E0:%.*]] = fpext half [[L0]] to float
+; CHECK-NEXT:    [[E1:%.*]] = fpext half [[L1]] to float
+; CHECK-NEXT:    [[E2:%.*]] = fpext half [[L2]] to float
+; CHECK-NEXT:    [[E3:%.*]] = fpext half [[L3]] to float
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 1
+; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 2
+; CHECK-NEXT:    [[D3:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 3
+; CHECK-NEXT:    store float [[E0]], ptr [[D0]], align 8
+; CHECK-NEXT:    store float [[E1]], ptr [[D1]], align 8
+; CHECK-NEXT:    store float [[E2]], ptr [[D2]], align 8
+; CHECK-NEXT:    store float [[E3]], ptr [[D3]], align 8
+; CHECK-NEXT:    ret void
+;
+; CHECK-F16C-LABEL: define void @fpext_v4xf16_v4xf32(
+; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2
+; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float>
+; CHECK-F16C-NEXT:    store <4 x float> [[TMP2]], ptr [[D0]], align 8
+; CHECK-F16C-NEXT:    ret void
+;
+; CHECK-AVX512-LABEL: define void @fpext_v4xf16_v4xf32(
+; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-AVX512-NEXT:    [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2
+; CHECK-AVX512-NEXT:    [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float>
+; CHECK-AVX512-NEXT:    store <4 x float> [[TMP2]], ptr [[D0]], align 8
+; CHECK-AVX512-NEXT:    ret void
+;
+  %s1 = getelementptr inbounds half, ptr %s0, i64 1
+  %s2 = getelementptr inbounds half, ptr %s0, i64 2
+  %s3 = getelementptr inbounds half, ptr %s0, i64 3
+  %l0 = load half, ptr %s0, align 2
+  %l1 = load half, ptr %s1, align 2
+  %l2 = load half, ptr %s2, align 2
+  %l3 = load half, ptr %s3, align 2
+
+  %e0 = fpext half %l0 to float
+  %e1 = fpext half %l1 to float
+  %e2 = fpext half %l2 to float
+  %e3 = fpext half %l3 to float
+
+  %d1 = getelementptr inbounds float, ptr %d0, i64 1
+  %d2 = getelementptr inbounds float, ptr %d0, i64 2
+  %d3 = getelementptr inbounds float, ptr %d0, i64 3
+  store float %e0, ptr %d0, align 8
+  store float %e1, ptr %d1, align 8
+  store float %e2, ptr %d2, align 8
+  store float %e3, ptr %d3, align 8
+  ret void
+}
+
+define void @fpext_v4xf16_v4xf64(ptr %s0, ptr %d0) {
+; CHECK-LABEL: define void @fpext_v4xf16_v4xf64(
+; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1
+; CHECK-NEXT:    [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2
+; CHECK-NEXT:    [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3
+; CHECK-NEXT:    [[L0:%.*]] = load half, ptr [[S0]], align 2
+; CHECK-NEXT:    [[L1:%.*]] = load half, ptr [[S1]], align 2
+; CHECK-NEXT:    [[L2:%.*]] = load half, ptr [[S2]], align 2
+; CHECK-NEXT:    [[L3:%.*]] = load half, ptr [[S3]], align 2
+; CHECK-NEXT:    [[E0:%.*]] = fpext half [[L0]] to double
+; CHECK-NEXT:    [[E1:%.*]] = fpext half [[L1]] to double
+; CHECK-NEXT:    [[E2:%.*]] = fpext half [[L2]] to double
+; CHECK-NEXT:    [[E3:%.*]] = fpext half [[L3]] to double
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 1
+; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 2
+; CHECK-NEXT:    [[D3:%.*]] = getelementptr inbounds double, ptr [[D0]], i64 3
+; CHECK-NEXT:    store double [[E0]], ptr [[D0]], align 8
+; CHECK-NEXT:    store double [[E1]], ptr [[D1]], align 8
+; CHECK-NEXT:    store double [[E2]], ptr [[D2]], align 8
+; CHECK-NEXT:    store double [[E3]], ptr [[D3]], align 8
+; CHECK-NEXT:    ret void
+;
+; CHECK-F16C-LABEL: define void @fpext_v4xf16_v4xf64(
+; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2
+; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x double>
+; CHECK-F16C-NEXT:    store <4 x double> [[TMP2]], ptr [[D0]], align 8
+; CHECK-F16C-NEXT:    ret void
+;
+; CHECK-AVX512-LABEL: define void @fpext_v4xf16_v4xf64(
+; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-AVX512-NEXT:    [[TMP1:%.*]] = load <4 x half>, ptr [[S0]], align 2
+; CHECK-AVX512-NEXT:    [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x double>
+; CHECK-AVX512-NEXT:    store <4 x double> [[TMP2]], ptr [[D0]], align 8
+; CHECK-AVX512-NEXT:    ret void
+;
+  %s1 = getelementptr inbounds half, ptr %s0, i64 1
+  %s2 = getelementptr inbounds half, ptr %s0, i64 2
+  %s3 = getelementptr inbounds half, ptr %s0, i64 3
+  %l0 = load half, ptr %s0, align 2
+  %l1 = load half, ptr %s1, align 2
+  %l2 = load half, ptr %s2, align 2
+  %l3 = load half, ptr %s3, align 2
+
+  %e0 = fpext half %l0 to double
+  %e1 = fpext half %l1 to double
+  %e2 = fpext half %l2 to double
+  %e3 = fpext half %l3 to double
+
+  %d1 = getelementptr inbounds double, ptr %d0, i64 1
+  %d2 = getelementptr inbounds double, ptr %d0, i64 2
+  %d3 = getelementptr inbounds double, ptr %d0, i64 3
+  store double %e0, ptr %d0, align 8
+  store double %e1, ptr %d1, align 8
+  store double %e2, ptr %d2, align 8
+  store double %e3, ptr %d3, align 8
+  ret void
+}
+
+define void @fpext_v16xf16_v16xf32(ptr %s0, ptr %d0) {
+; CHECK-LABEL: define void @fpext_v16xf16_v16xf32(
+; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1
+; CHECK-NEXT:    [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2
+; CHECK-NEXT:    [[S3:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 3
+; CHECK-NEXT:    [[S4:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 4
+; CHECK-NEXT:    [[S5:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 5
+; CHECK-NEXT:    [[S6:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 6
+; CHECK-NEXT:    [[S7:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 7
+; CHECK-NEXT:    [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8
+; CHECK-NEXT:    [[S9:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 9
+; CHECK-NEXT:    [[S10:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 10
+; CHECK-NEXT:    [[S11:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 11
+; CHECK-NEXT:    [[S12:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 12
+; CHECK-NEXT:    [[S13:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 13
+; CHECK-NEXT:    [[S14:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 14
+; CHECK-NEXT:    [[S15:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 15
+; CHECK-NEXT:    [[L0:%.*]] = load half, ptr [[S0]], align 2
+; CHECK-NEXT:    [[L1:%.*]] = load half, ptr [[S1]], align 2
+; CHECK-NEXT:    [[L2:%.*]] = load half, ptr [[S2]], align 2
+; CHECK-NEXT:    [[L3:%.*]] = load half, ptr [[S3]], align 2
+; CHECK-NEXT:    [[L4:%.*]] = load half, ptr [[S4]], align 2
+; CHECK-NEXT:    [[L5:%.*]] = load half, ptr [[S5]], align 2
+; CHECK-NEXT:    [[L6:%.*]] = load half, ptr [[S6]], align 2
+; CHECK-NEXT:    [[L7:%.*]] = load half, ptr [[S7]], align 2
+; CHECK-NEXT:    [[L8:%.*]] = load half, ptr [[S8]], align 2
+; CHECK-NEXT:    [[L9:%.*]] = load half, ptr [[S9]], align 2
+; CHECK-NEXT:    [[L10:%.*]] = load half, ptr [[S10]], align 2
+; CHECK-NEXT:    [[L11:%.*]] = load half, ptr [[S11]], align 2
+; CHECK-NEXT:    [[L12:%.*]] = load half, ptr [[S12]], align 2
+; CHECK-NEXT:    [[L13:%.*]] = load half, ptr [[S13]], align 2
+; CHECK-NEXT:    [[L14:%.*]] = load half, ptr [[S14]], align 2
+; CHECK-NEXT:    [[L15:%.*]] = load half, ptr [[S15]], align 2
+; CHECK-NEXT:    [[E0:%.*]] = fpext half [[L0]] to float
+; CHECK-NEXT:    [[E1:%.*]] = fpext half [[L1]] to float
+; CHECK-NEXT:    [[E2:%.*]] = fpext half [[L2]] to float
+; CHECK-NEXT:    [[E3:%.*]] = fpext half [[L3]] to float
+; CHECK-NEXT:    [[E4:%.*]] = fpext half [[L4]] to float
+; CHECK-NEXT:    [[E5:%.*]] = fpext half [[L5]] to float
+; CHECK-NEXT:    [[E6:%.*]] = fpext half [[L6]] to float
+; CHECK-NEXT:    [[E7:%.*]] = fpext half [[L7]] to float
+; CHECK-NEXT:    [[E8:%.*]] = fpext half [[L8]] to float
+; CHECK-NEXT:    [[E9:%.*]] = fpext half [[L9]] to float
+; CHECK-NEXT:    [[E10:%.*]] = fpext half [[L10]] to float
+; CHECK-NEXT:    [[E11:%.*]] = fpext half [[L11]] to float
+; CHECK-NEXT:    [[E12:%.*]] = fpext half [[L12]] to float
+; CHECK-NEXT:    [[E13:%.*]] = fpext half [[L13]] to float
+; CHECK-NEXT:    [[E14:%.*]] = fpext half [[L14]] to float
+; CHECK-NEXT:    [[E15:%.*]] = fpext half [[L15]] to float
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 1
+; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 2
+; CHECK-NEXT:    [[D15:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 3
+; CHECK-NEXT:    [[D4:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 4
+; CHECK-NEXT:    [[D5:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 5
+; CHECK-NEXT:    [[D6:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 6
+; CHECK-NEXT:    [[D7:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 7
+; CHECK-NEXT:    [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8
+; CHECK-NEXT:    [[D9:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 9
+; CHECK-NEXT:    [[D10:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 10
+; CHECK-NEXT:    [[D11:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 11
+; CHECK-NEXT:    [[D12:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 12
+; CHECK-NEXT:    [[D13:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 13
+; CHECK-NEXT:    [[D14:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 14
+; CHECK-NEXT:    [[D16:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 15
+; CHECK-NEXT:    store float [[E0]], ptr [[D0]], align 8
+; CHECK-NEXT:    store float [[E1]], ptr [[D1]], align 8
+; CHECK-NEXT:    store float [[E2]], ptr [[D2]], align 8
+; CHECK-NEXT:    store float [[E3]], ptr [[D15]], align 8
+; CHECK-NEXT:    store float [[E4]], ptr [[D4]], align 8
+; CHECK-NEXT:    store float [[E5]], ptr [[D5]], align 8
+; CHECK-NEXT:    store float [[E6]], ptr [[D6]], align 8
+; CHECK-NEXT:    store float [[E7]], ptr [[D7]], align 8
+; CHECK-NEXT:    store float [[E8]], ptr [[D8]], align 8
+; CHECK-NEXT:    store float [[E9]], ptr [[D9]], align 8
+; CHECK-NEXT:    store float [[E10]], ptr [[D10]], align 8
+; CHECK-NEXT:    store float [[E11]], ptr [[D11]], align 8
+; CHECK-NEXT:    store float [[E12]], ptr [[D12]], align 8
+; CHECK-NEXT:    store float [[E13]], ptr [[D13]], align 8
+; CHECK-NEXT:    store float [[E14]], ptr [[D14]], align 8
+; CHECK-NEXT:    store float [[E15]], ptr [[D16]], align 8
+; CHECK-NEXT:    ret void
+;
+; CHECK-F16C-LABEL: define void @fpext_v16xf16_v16xf32(
+; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-F16C-NEXT:    [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8
+; CHECK-F16C-NEXT:    [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8
+; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[S0]], align 2
+; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fpext <8 x half> [[TMP1]] to <8 x float>
+; CHECK-F16C-NEXT:    [[TMP3:%.*]] = load <8 x half>, ptr [[S8]], align 2
+; CHECK-F16C-NEXT:    [[TMP4:%.*]] = fpext <8 x half> [[TMP3]] to <8 x float>
+; CHECK-F16C-NEXT:    store <8 x float> [[TMP2]], ptr [[D0]], align 8
+; CHECK-F16C-NEXT:    store <8 x float> [[TMP4]], ptr [[D8]], align 8
+; CHECK-F16C-NEXT:    ret void
+;
+; CHECK-AVX512-LABEL: define void @fpext_v16xf16_v16xf32(
+; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-AVX512-NEXT:    [[TMP1:%.*]] = load <16 x half>, ptr [[S0]], align 2
+; CHECK-AVX512-NEXT:    [[TMP2:%.*]] = fpext <16 x half> [[TMP1]] to <16 x float>
+; CHECK-AVX512-NEXT:    store <16 x float> [[TMP2]], ptr [[D0]], align 8
+; CHECK-AVX512-NEXT:    ret void
+;
+  %s1 = getelementptr inbounds half, ptr %s0, i64 1
+  %s2 = getelementptr inbounds half, ptr %s0, i64 2
+  %s3 = getelementptr inbounds half, ptr %s0, i64 3
+  %s4 = getelementptr inbounds half, ptr %s0, i64 4
+  %s5 = getelementptr inbounds half, ptr %s0, i64 5
+  %s6 = getelementptr inbounds half, ptr %s0, i64 6
+  %s7 = getelementptr inbounds half, ptr %s0, i64 7
+  %s8 = getelementptr inbounds half, ptr %s0, i64 8
+  %s9 = getelementptr inbounds half, ptr %s0, i64 9
+  %s10 = getelementptr inbounds half, ptr %s0, i64 10
+  %s11 = getelementptr inbounds half, ptr %s0, i64 11
+  %s12 = getelementptr inbounds half, ptr %s0, i64 12
+  %s13 = getelementptr inbounds half, ptr %s0, i64 13
+  %s14 = getelementptr inbounds half, ptr %s0, i64 14
+  %s15 = getelementptr inbounds half, ptr %s0, i64 15
+  %l0 = load half, ptr %s0, align 2
+  %l1 = load half, ptr %s1, align 2
+  %l2 = load half, ptr %s2, align 2
+  %l3 = load half, ptr %s3, align 2
+  %l4 = load half, ptr %s4, align 2
+  %l5 = load half, ptr %s5, align 2
+  %l6 = load half, ptr %s6, align 2
+  %l7 = load half, ptr %s7, align 2
+  %l8 = load half, ptr %s8, align 2
+  %l9 = load half, ptr %s9, align 2
+  %l10 = load half, ptr %s10, align 2
+  %l11 = load half, ptr %s11, align 2
+  %l12 = load half, ptr %s12, align 2
+  %l13 = load half, ptr %s13, align 2
+  %l14 = load half, ptr %s14, align 2
+  %l15 = load half, ptr %s15, align 2
+
+  %e0 = fpext half %l0 to float
+  %e1 = fpext half %l1 to float
+  %e2 = fpext half %l2 to float
+  %e3 = fpext half %l3 to float
+  %e4 = fpext half %l4 to float
+  %e5 = fpext half %l5 to float
+  %e6 = fpext half %l6 to float
+  %e7 = fpext half %l7 to float
+  %e8 = fpext half %l8 to float
+  %e9 = fpext half %l9 to float
+  %e10 = fpext half %l10 to float
+  %e11 = fpext half %l11 to float
+  %e12 = fpext half %l12 to float
+  %e13 = fpext half %l13 to float
+  %e14 = fpext half %l14 to float
+  %e15 = fpext half %l15 to float
+
+  %d1 = getelementptr inbounds float, ptr %d0, i64 1
+  %d2 = getelementptr inbounds float, ptr %d0, i64 2
+  %d3 = getelementptr inbounds float, ptr %d0, i64 3
+  %d4 = getelementptr inbounds float, ptr %d0, i64 4
+  %d5 = getelementptr inbounds float, ptr %d0, i64 5
+  %d6 = getelementptr inbounds float, ptr %d0, i64 6
+  %d7 = getelementptr inbounds float, ptr %d0, i64 7
+  %d8 = getelementptr inbounds float, ptr %d0, i64 8
+  %d9 = getelementptr inbounds float, ptr %d0, i64 9
+  %d10 = getelementptr inbounds float, ptr %d0, i64 10
+  %d11 = getelementptr inbounds float, ptr %d0, i64 11
+  %d12 = getelementptr inbounds float, ptr %d0, i64 12
+  %d13 = getelementptr inbounds float, ptr %d0, i64 13
+  %d14 = getelementptr inbounds float, ptr %d0, i64 14
+  %d15 = getelementptr inbounds float, ptr %d0, i64 15
+  store float %e0, ptr %d0, align 8
+  store float %e1, ptr %d1, align 8
+  store float %e2, ptr %d2, align 8
+  store float %e3, ptr %d3, align 8
+  store float %e4, ptr %d4, align 8
+  store float %e5, ptr %d5, align 8
+  store float %e6, ptr %d6, align 8
+  store float %e7, ptr %d7, align 8
+  store float %e8, ptr %d8, align 8
+  store float %e9, ptr %d9, align 8
+  store float %e10, ptr %d10, align 8
+  store float %e11, ptr %d11, align 8
+  store float %e12, ptr %d12, align 8
+  store float %e13, ptr %d13, align 8
+  store float %e14, ptr %d14, align 8
+  store float %e15, ptr %d15, align 8
+  ret void
+}
+
+define void @fpround_v4xf32_v4xf16(ptr %s0, ptr %d0) {
+; CHECK-LABEL: define void @fpround_v4xf32_v4xf16(
+; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[S1:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 1
+; CHECK-NEXT:    [[S2:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 2
+; CHECK-NEXT:    [[S3:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 3
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[S0]], align 4
+; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[S1]], align 4
+; CHECK-NEXT:    [[L2:%.*]] = load float, ptr [[S2]], align 4
+; CHECK-NEXT:    [[L3:%.*]] = load float, ptr [[S3]], align 4
+; CHECK-NEXT:    [[T0:%.*]] = fptrunc float [[L0]] to half
+; CHECK-NEXT:    [[T1:%.*]] = fptrunc float [[L1]] to half
+; CHECK-NEXT:    [[T2:%.*]] = fptrunc float [[L2]] to half
+; CHECK-NEXT:    [[T3:%.*]] = fptrunc float [[L3]] to half
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1
+; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 2
+; CHECK-NEXT:    [[D3:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 3
+; CHECK-NEXT:    store half [[T0]], ptr [[D0]], align 2
+; CHECK-NEXT:    store half [[T1]], ptr [[D1]], align 2
+; CHECK-NEXT:    store half [[T2]], ptr [[D2]], align 2
+; CHECK-NEXT:    store half [[T3]], ptr [[D3]], align 2
+; CHECK-NEXT:    ret void
+;
+; CHECK-F16C-LABEL: define void @fpround_v4xf32_v4xf16(
+; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[S0]], align 4
+; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fptrunc <4 x float> [[TMP1]] to <4 x half>
+; CHECK-F16C-NEXT:    store <4 x half> [[TMP2]], ptr [[D0]], align 2
+; CHECK-F16C-NEXT:    ret void
+;
+; CHECK-AVX512-LABEL: define void @fpround_v4xf32_v4xf16(
+; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-AVX512-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[S0]], align 4
+; CHECK-AVX512-NEXT:    [[TMP2:%.*]] = fptrunc <4 x float> [[TMP1]] to <4 x half>
+; CHECK-AVX512-NEXT:    store <4 x half> [[TMP2]], ptr [[D0]], align 2
+; CHECK-AVX512-NEXT:    ret void
+;
+  %s1 = getelementptr inbounds float, ptr %s0, i64 1
+  %s2 = getelementptr inbounds float, ptr %s0, i64 2
+  %s3 = getelementptr inbounds float, ptr %s0, i64 3
+  %l0 = load float, ptr %s0, align 4
+  %l1 = load float, ptr %s1, align 4
+  %l2 = load float, ptr %s2, align 4
+  %l3 = load float, ptr %s3, align 4
+
+  %t0 = fptrunc float %l0 to half
+  %t1 = fptrunc float %l1 to half
+  %t2 = fptrunc float %l2 to half
+  %t3 = fptrunc float %l3 to half
+
+  %d1 = getelementptr inbounds half, ptr %d0, i64 1
+  %d2 = getelementptr inbounds half, ptr %d0, i64 2
+  %d3 = getelementptr inbounds half, ptr %d0, i64 3
+  store half %t0, ptr %d0, align 2
+  store half %t1, ptr %d1, align 2
+  store half %t2, ptr %d2, align 2
+  store half %t3, ptr %d3, align 2
+  ret void
+}
+
+define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) {
+; CHECK-LABEL: define void @fpround_v16xf32_v16xf16(
+; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[S1:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 1
+; CHECK-NEXT:    [[S2:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 2
+; CHECK-NEXT:    [[S3:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 3
+; CHECK-NEXT:    [[S4:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 4
+; CHECK-NEXT:    [[S5:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 5
+; CHECK-NEXT:    [[S6:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 6
+; CHECK-NEXT:    [[S7:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 7
+; CHECK-NEXT:    [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8
+; CHECK-NEXT:    [[S9:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 9
+; CHECK-NEXT:    [[S10:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 10
+; CHECK-NEXT:    [[S11:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 11
+; CHECK-NEXT:    [[S12:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 12
+; CHECK-NEXT:    [[S13:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 13
+; CHECK-NEXT:    [[S14:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 14
+; CHECK-NEXT:    [[S15:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 15
+; CHECK-NEXT:    [[L0:%.*]] = load float, ptr [[S0]], align 4
+; CHECK-NEXT:    [[L1:%.*]] = load float, ptr [[S1]], align 4
+; CHECK-NEXT:    [[L2:%.*]] = load float, ptr [[S2]], align 4
+; CHECK-NEXT:    [[L3:%.*]] = load float, ptr [[S3]], align 4
+; CHECK-NEXT:    [[L4:%.*]] = load float, ptr [[S4]], align 4
+; CHECK-NEXT:    [[L5:%.*]] = load float, ptr [[S5]], align 4
+; CHECK-NEXT:    [[L6:%.*]] = load float, ptr [[S6]], align 4
+; CHECK-NEXT:    [[L7:%.*]] = load float, ptr [[S7]], align 4
+; CHECK-NEXT:    [[L8:%.*]] = load float, ptr [[S8]], align 4
+; CHECK-NEXT:    [[L9:%.*]] = load float, ptr [[S9]], align 4
+; CHECK-NEXT:    [[L10:%.*]] = load float, ptr [[S10]], align 4
+; CHECK-NEXT:    [[L11:%.*]] = load float, ptr [[S11]], align 4
+; CHECK-NEXT:    [[L12:%.*]] = load float, ptr [[S12]], align 4
+; CHECK-NEXT:    [[L13:%.*]] = load float, ptr [[S13]], align 4
+; CHECK-NEXT:    [[L14:%.*]] = load float, ptr [[S14]], align 4
+; CHECK-NEXT:    [[L15:%.*]] = load float, ptr [[S15]], align 4
+; CHECK-NEXT:    [[T0:%.*]] = fptrunc float [[L0]] to half
+; CHECK-NEXT:    [[T1:%.*]] = fptrunc float [[L1]] to half
+; CHECK-NEXT:    [[T2:%.*]] = fptrunc float [[L2]] to half
+; CHECK-NEXT:    [[T3:%.*]] = fptrunc float [[L3]] to half
+; CHECK-NEXT:    [[T4:%.*]] = fptrunc float [[L4]] to half
+; CHECK-NEXT:    [[T5:%.*]] = fptrunc float [[L5]] to half
+; CHECK-NEXT:    [[T6:%.*]] = fptrunc float [[L6]] to half
+; CHECK-NEXT:    [[T7:%.*]] = fptrunc float [[L7]] to half
+; CHECK-NEXT:    [[T8:%.*]] = fptrunc float [[L8]] to half
+; CHECK-NEXT:    [[T9:%.*]] = fptrunc float [[L9]] to half
+; CHECK-NEXT:    [[T10:%.*]] = fptrunc float [[L10]] to half
+; CHECK-NEXT:    [[T11:%.*]] = fptrunc float [[L11]] to half
+; CHECK-NEXT:    [[T12:%.*]] = fptrunc float [[L12]] to half
+; CHECK-NEXT:    [[T13:%.*]] = fptrunc float [[L13]] to half
+; CHECK-NEXT:    [[T14:%.*]] = fptrunc float [[L14]] to half
+; CHECK-NEXT:    [[T15:%.*]] = fptrunc float [[L15]] to half
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1
+; CHECK-NEXT:    [[D2:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 2
+; CHECK-NEXT:    [[D3:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 3
+; CHECK-NEXT:    [[D4:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 4
+; CHECK-NEXT:    [[D5:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 5
+; CHECK-NEXT:    [[D6:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 6
+; CHECK-NEXT:    [[D7:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 7
+; CHECK-NEXT:    [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8
+; CHECK-NEXT:    [[D9:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 9
+; CHECK-NEXT:    [[D10:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 10
+; CHECK-NEXT:    [[D11:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 11
+; CHECK-NEXT:    [[D12:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 12
+; CHECK-NEXT:    [[D13:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 13
+; CHECK-NEXT:    [[D14:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 14
+; CHECK-NEXT:    [[D15:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 15
+; CHECK-NEXT:    store half [[T0]], ptr [[D0]], align 2
+; CHECK-NEXT:    store half [[T1]], ptr [[D1]], align 2
+; CHECK-NEXT:    store half [[T2]], ptr [[D2]], align 2
+; CHECK-NEXT:    store half [[T3]], ptr [[D3]], align 2
+; CHECK-NEXT:    store half [[T4]], ptr [[D4]], align 2
+; CHECK-NEXT:    store half [[T5]], ptr [[D5]], align 2
+; CHECK-NEXT:    store half [[T6]], ptr [[D6]], align 2
+; CHECK-NEXT:    store half [[T7]], ptr [[D7]], align 2
+; CHECK-NEXT:    store half [[T8]], ptr [[D8]], align 2
+; CHECK-NEXT:    store half [[T9]], ptr [[D9]], align 2
+; CHECK-NEXT:    store half [[T10]], ptr [[D10]], align 2
+; CHECK-NEXT:    store half [[T11]], ptr [[D11]], align 2
+; CHECK-NEXT:    store half [[T12]], ptr [[D12]], align 2
+; CHECK-NEXT:    store half [[T13]], ptr [[D13]], align 2
+; CHECK-NEXT:    store half [[T14]], ptr [[D14]], align 2
+; CHECK-NEXT:    store half [[T15]], ptr [[D15]], align 2
+; CHECK-NEXT:    ret void
+;
+; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16(
+; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-F16C-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4
+; CHECK-F16C-NEXT:    [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half>
+; CHECK-F16C-NEXT:    store <16 x half> [[TMP2]], ptr [[D0]], align 2
+; CHECK-F16C-NEXT:    ret void
+;
+; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16(
+; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-AVX512-NEXT:    [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4
+; CHECK-AVX512-NEXT:    [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half>
+; CHECK-AVX512-NEXT:    store <16 x half> [[TMP2]], ptr [[D0]], align 2
+; CHECK-AVX512-NEXT:    ret void
+;
+  %s1 = getelementptr inbounds float, ptr %s0, i64 1
+  %s2 = getelementptr inbounds float, ptr %s0, i64 2
+  %s3 = getelementptr inbounds float, ptr %s0, i64 3
+  %s4 = getelementptr inbounds float, ptr %s0, i64 4
+  %s5 = getelementptr inbounds float, ptr %s0, i64 5
+  %s6 = getelementptr inbounds float, ptr %s0, i64 6
+  %s7 = getelementptr inbounds float, ptr %s0, i64 7
+  %s8 = getelementptr inbounds float, ptr %s0, i64 8
+  %s9 = getelementptr inbounds float, ptr %s0, i64 9
+  %s10 = getelementptr inbounds float, ptr %s0, i64 10
+  %s11 = getelementptr inbounds float, ptr %s0, i64 11
+  %s12 = getelementptr inbounds float, ptr %s0, i64 12
+  %s13 = getelementptr inbounds float, ptr %s0, i64 13
+  %s14 = getelementptr inbounds float, ptr %s0, i64 14
+  %s15 = getelementptr inbounds float, ptr %s0, i64 15
+  %l0 = load float, ptr %s0, align 4
+  %l1 = load float, ptr %s1, align 4
+  %l2 = load float, ptr %s2, align 4
+  %l3 = load float, ptr %s3, align 4
+  %l4 = load float, ptr %s4, align 4
+  %l5 = load float, ptr %s5, align 4
+  %l6 = load float, ptr %s6, align 4
+  %l7 = load float, ptr %s7, align 4
+  %l8 = load float, ptr %s8, align 4
+  %l9 = load float, ptr %s9, align 4
+  %l10 = load float, ptr %s10, align 4
+  %l11 = load float, ptr %s11, align 4
+  %l12 = load float, ptr %s12, align 4
+  %l13 = load float, ptr %s13, align 4
+  %l14 = load float, ptr %s14, align 4
+  %l15 = load float, ptr %s15, align 4
+
+  %t0 = fptrunc float %l0 to half
+  %t1 = fptrunc float %l1 to half
+  %t2 = fptrunc float %l2 to half
+  %t3 = fptrunc float %l3 to half
+  %t4 = fptrunc float %l4 to half
+  %t5 = fptrunc float %l5 to half
+  %t6 = fptrunc float %l6 to half
+  %t7 = fptrunc float %l7 to half
+  %t8 = fptrunc float %l8 to half
+  %t9 = fptrunc float %l9 to half
+  %t10 = fptrunc float %l10 to half
+  %t11 = fptrunc float %l11 to half
+  %t12 = fptrunc float %l12 to half
+  %t13 = fptrunc float %l13 to half
+  %t14 = fptrunc float %l14 to half
+  %t15 = fptrunc float %l15 to half
+
+  %d1 = getelementptr inbounds half, ptr %d0, i64 1
+  %d2 = getelementptr inbounds half, ptr %d0, i64 2
+  %d3 = getelementptr inbounds half, ptr %d0, i64 3
+  %d4 = getelementptr inbounds half, ptr %d0, i64 4
+  %d5 = getelementptr inbounds half, ptr %d0, i64 5
+  %d6 = getelementptr inbounds half, ptr %d0, i64 6
+  %d7 = getelementptr inbounds half, ptr %d0, i64 7
+  %d8 = getelementptr inbounds half, ptr %d0, i64 8
+  %d9 = getelementptr inbounds half, ptr %d0, i64 9
+  %d10 = getelementptr inbounds half, ptr %d0, i64 10
+  %d11 = getelementptr inbounds half, ptr %d0, i64 11
+  %d12 = getelementptr inbounds half, ptr %d0, i64 12
+  %d13 = getelementptr inbounds half, ptr %d0, i64 13
+  %d14 = getelementptr inbounds half, ptr %d0, i64 14
+  %d15 = getelementptr inbounds half, ptr %d0, i64 15
+  store half %t0, ptr %d0, align 2
+  store half %t1, ptr %d1, align 2
+  store half %t2, ptr %d2, align 2
+  store half %t3, ptr %d3, align 2
+  store half %t4, ptr %d4, align 2
+  store half %t5, ptr %d5, align 2
+  store half %t6, ptr %d6, align 2
+  store half %t7, ptr %d7, align 2
+  store half %t8, ptr %d8, align 2
+  store half %t9, ptr %d9, align 2
+  store half %t10, ptr %d10, align 2
+  store half %t11, ptr %d11, align 2
+  store half %t12, ptr %d12, align 2
+  store half %t13, ptr %d13, align 2
+  store half %t14, ptr %d14, align 2
+  store half %t15, ptr %d15, align 2
+  ret void
+
+}
+
+; There is no instruction to round f64 to f16; this should not get vectorized!
+define void @fpround_v2xf64_v2xf16(ptr %s0, ptr %d0) {
+; CHECK-LABEL: define void @fpround_v2xf64_v2xf16(
+; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1
+; CHECK-NEXT:    [[L0:%.*]] = load double, ptr [[S0]], align 4
+; CHECK-NEXT:    [[L1:%.*]] = load double, ptr [[S1]], align 4
+; CHECK-NEXT:    [[T0:%.*]] = fptrunc double [[L0]] to half
+; CHECK-NEXT:    [[T1:%.*]] = fptrunc double [[L1]] to half
+; CHECK-NEXT:    [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1
+; CHECK-NEXT:    store half [[T0]], ptr [[D0]], align 2
+; CHECK-NEXT:    store half [[T1]], ptr [[D1]], align 2
+; CHECK-NEXT:    ret void
+;
+; CHECK-F16C-LABEL: define void @fpround_v2xf64_v2xf16(
+; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-F16C-NEXT:    [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1
+; CHECK-F16C-NEXT:    [[L0:%.*]] = load double, ptr [[S0]], align 4
+; CHECK-F16C-NEXT:    [[L1:%.*]] = load double, ptr [[S1]], align 4
+; CHECK-F16C-NEXT:    [[T0:%.*]] = fptrunc double [[L0]] to half
+; CHECK-F16C-NEXT:    [[T1:%.*]] = fptrunc double [[L1]] to half
+; CHECK-F16C-NEXT:    [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1
+; CHECK-F16C-NEXT:    store half [[T0]], ptr [[D0]], align 2
+; CHECK-F16C-NEXT:    store half [[T1]], ptr [[D1]], align 2
+; CHECK-F16C-NEXT:    ret void
+;
+; CHECK-AVX512-LABEL: define void @fpround_v2xf64_v2xf16(
+; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
+; CHECK-AVX512-NEXT:    [[S1:%.*]] = getelementptr inbounds double, ptr [[S0]], i64 1
+; CHECK-AVX512-NEXT:    [[L0:%.*]] = load double, ptr [[S0]], align 4
+; CHECK-AVX512-NEXT:    [[L1:%.*]] = load double, ptr [[S1]], align 4
+; CHECK-AVX512-NEXT:    [[T0:%.*]] = fptrunc double [[L0]] to half
+; CHECK-AVX512-NEXT:    [[T1:%.*]] = fptrunc double [[L1]] to half
+; CHECK-AVX512-NEXT:    [[D1:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 1
+; CHECK-AVX512-NEXT:    store half [[T0]], ptr [[D0]], align 2
+; CHECK-AVX512-NEXT:    store half [[T1]], ptr [[D1]], align 2
+; CHECK-AVX512-NEXT:    ret void
+;
+  %s1 = getelementptr inbounds double, ptr %s0, i64 1
+  %l0 = load double, ptr %s0, align 4
+  %l1 = load double, ptr %s1, align 4
+
+  %t0 = fptrunc double %l0 to half
+  %t1 = fptrunc double %l1 to half
+
+  %d1 = getelementptr inbounds half, ptr %d0, i64 1
+  store half %t0, ptr %d0, align 2
+  store half %t1, ptr %d1, align 2
+  ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
new file mode 100644
index 000000000000000..dbd91199c24ecd1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/full-overlap-non-schedulable.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define void @test(ptr %p1, ptr %0, i32 %1, i1 %c1, ptr %p2) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[TMP0:%.*]], i32 [[TMP1:%.*]], i1 [[C1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  [[TOP:.*:]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x ptr> [[TMP3]], <4 x ptr> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, <4 x ptr> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 16, i64 20>
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 2
+; CHECK-NEXT:    br i1 [[C1]], label %[[L42:.*]], label %[[L41:.*]]
+; CHECK:       [[L41]]:
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x ptr> [[TMP5]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> [[TMP8]]
+; CHECK-NEXT:    br label %[[L112:.*]]
+; CHECK:       [[L42]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[DOTNOT280:%.*]] = icmp eq i32 [[TMP10]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP1]], i32 2
+; CHECK-NEXT:    br i1 [[DOTNOT280]], label %[[L112]], label %[[L47:.*]]
+; CHECK:       [[L47]]:
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP5]], i32 1
+; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x ptr> [[TMP5]], <4 x ptr> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq <2 x ptr> [[TMP14]], zeroinitializer
+; CHECK-NEXT:    [[TMP16:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> zeroinitializer, <2 x i32> [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP13]], i32 1
+; CHECK-NEXT:    [[TMP19:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP18]], <2 x i32> [[TMP17]], i64 2)
+; CHECK-NEXT:    br label %[[L112]]
+; CHECK:       [[L112]]:
+; CHECK-NEXT:    [[TMP20:%.*]] = phi <4 x i32> [ [[TMP19]], %[[L47]] ], [ [[TMP9]], %[[L41]] ], [ [[TMP11]], %[[L42]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i32 0
+; CHECK-NEXT:    store i32 [[TMP21]], ptr [[P2]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i32> [[TMP20]], i32 1
+; CHECK-NEXT:    store i32 [[TMP22]], ptr [[P1]], align 4
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[TMP20]], i32 2
+; CHECK-NEXT:    store i32 [[TMP23]], ptr [[P2]], align 4
+; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[TMP20]], i32 3
+; CHECK-NEXT:    store i32 [[TMP24]], ptr [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
+top:
+  %2 = getelementptr i8, ptr %0, i64 8
+  %3 = getelementptr i8, ptr %0, i64 12
+  %4 = getelementptr i8, ptr %0, i64 16
+  %5 = getelementptr i8, ptr %0, i64 20
+  br i1 %c1, label %L42, label %L41
+
+L41:
+  %.not276 = icmp eq ptr %2, null
+  %6 = load i32, ptr %2, align 4
+  %7 = select i1 %.not276, i32 0, i32 %6
+  %.not277 = icmp eq ptr %3, null
+  %8 = load i32, ptr %3, align 4
+  %9 = select i1 %.not277, i32 0, i32 %8
+  %.not278 = icmp eq ptr %4, null
+  %10 = load i32, ptr %4, align 4
+  %11 = select i1 %.not278, i32 0, i32 %10
+  %.not279 = icmp eq ptr %5, null
+  %12 = load i32, ptr %5, align 4
+  %13 = select i1 %.not279, i32 0, i32 %12
+  br label %L112
+
+L42:
+  %14 = load i32, ptr %2, align 4
+  %.not280 = icmp eq i32 %14, 0
+  br i1 %.not280, label %L112, label %L47
+
+L47:
+  %15 = load i32, ptr %3, align 4
+  %.not282 = icmp eq ptr %4, null
+  %16 = load i32, ptr %4, align 4
+  %17 = select i1 %.not282, i32 0, i32 %16
+  %.not283 = icmp eq ptr %5, null
+  %18 = load i32, ptr %5, align 4
+  %19 = select i1 %.not283, i32 0, i32 %18
+  br label %L112
+
+L112:
+  %value_phi13336 = phi i32 [ %19, %L47 ], [ %13, %L41 ], [ 0, %L42 ]
+  %value_phi12335 = phi i32 [ %17, %L47 ], [ %11, %L41 ], [ %1, %L42 ]
+  %value_phi11334 = phi i32 [ %15, %L47 ], [ %9, %L41 ], [ 0, %L42 ]
+  %value_phi10333 = phi i32 [ 0, %L47 ], [ %7, %L41 ], [ 0, %L42 ]
+  store i32 %value_phi10333, ptr %p2, align 4
+  store i32 %value_phi11334, ptr %p1, align 4
+  store i32 %value_phi12335, ptr %p2, align 4
+  store i32 %value_phi13336, ptr %p1, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
index 9e4b33d4c4d5e55..fb342322b2da7af 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/2007-08-01-LCSSA.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
-; RUN: opt < %s -S -passes='loop(simple-loop-unswitch),instcombine<no-verify-fixpoint>' -verify-memoryssa | FileCheck %s
+; RUN: opt < %s -S -passes='loop(simple-loop-unswitch),instcombine' -verify-memoryssa | FileCheck %s
 
 ; We do not reach a fixpoint, because we first have to infer nsw on the IV add,
 ; and could eliminate the icmp slt afterwards, but don't revisit it.
@@ -8,9 +8,9 @@
 
 declare i32 @strcmp(ptr, ptr)
 
-define i32 @_ZN9Generator6strregEPKc(ptr %this, ptr %s) {
+define i32 @_ZN9Generator6strregEPKc(ptr %this, ptr %s) "instcombine-no-verify-fixpoint" {
 ; CHECK-LABEL: define i32 @_ZN9Generator6strregEPKc(
-; CHECK-SAME: ptr [[THIS:%.*]], ptr [[S:%.*]]) {
+; CHECK-SAME: ptr [[THIS:%.*]], ptr [[S:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP122:%.*]] = icmp eq ptr [[S]], null
 ; CHECK-NEXT:    br label [[BB184:%.*]]
diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
index 4e4b81e89a32700..76cccbd6f39cc39 100644
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -14,10 +14,15 @@
 ; SVML-SAME:          ptr @__svml_log10f4,
 ; SVML-SAME:          ptr @__svml_log10f8,
 ; SVML-SAME:          ptr @__svml_log10f16
-; AMDLIBM-SAME:     [6 x ptr] [
+; AMDLIBM-SAME:     [11 x ptr] [
 ; AMDLIBM-SAME:       ptr @amd_vrd2_sin,
 ; AMDLIBM-SAME:       ptr @amd_vrd4_sin,
 ; AMDLIBM-SAME:       ptr @amd_vrd8_sin,
+; AMDLIBM-SAME:       ptr @amd_vrd4_sincos,
+; AMDLIBM-SAME:       ptr @amd_vrd8_sincos,
+; AMDLIBM-SAME:       ptr @amd_vrs4_sincosf,
+; AMDLIBM-SAME:       ptr @amd_vrs8_sincosf,
+; AMDLIBM-SAME:       ptr @amd_vrs16_sincosf
 ; AMDLIBM-SAME:       ptr @amd_vrs4_log10f,
 ; AMDLIBM-SAME:       ptr @amd_vrs8_log10f,
 ; AMDLIBM-SAME:       ptr @amd_vrs16_log10f
@@ -106,6 +111,7 @@ define void @sincos_f64(double %in, ptr %sin, ptr %cos) {
 ; COMMON-LABEL: @sincos_f64(
 ; SLEEFGNUABI:  call void @sincos(double %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOS:[0-9]+]]
 ; ARMPL:        call void @sincos(double %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOS:[0-9]+]]
+; AMDLIBM:        call void @sincos(double %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOS:[0-9]+]]
   call void @sincos(double %in, ptr %sin, ptr %cos)
   ret void
 }
@@ -116,6 +122,7 @@ define void @sincos_f32(float %in, ptr %sin, ptr %cos) {
 ; COMMON-LABEL: @sincos_f32(
 ; SLEEFGNUABI:  call void @sincosf(float %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOSF:[0-9]+]]
 ; ARMPL:        call void @sincosf(float %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOSF:[0-9]+]]
+; AMDLIBM:        call void @sincosf(float %{{.*}}, ptr %{{.*}}, ptr %{{.*}}) #[[SINCOSF:[0-9]+]]
   call void @sincosf(float %in, ptr %sin, ptr %cos)
   ret void
 }
@@ -145,7 +152,7 @@ declare void @sincospif(float, ptr, ptr) #0
 define float @call_llvm.log10.f32(float %in) {
 ; COMMON-LABEL: @call_llvm.log10.f32(
 ; SVML:         call float @llvm.log10.f32(float %{{.*}})
-; AMDLIBM:      call float @llvm.log10.f32(float %{{.*}})
+; AMDLIBM:      call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
 ; LIBMVEC-X86:  call float @llvm.log10.f32(float %{{.*}})
 ; MASSV:        call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
 ; ACCELERATE:   call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
@@ -171,6 +178,11 @@ declare float @llvm.log10.f32(float) #0
 ; AMDLIBM: declare <2 x double> @amd_vrd2_sin(<2 x double>)
 ; AMDLIBM: declare <4 x double> @amd_vrd4_sin(<4 x double>)
 ; AMDLIBM: declare <8 x double> @amd_vrd8_sin(<8 x double>)
+; AMDLIBM: declare void @amd_vrd4_sincos(<4 x double>, ptr, ptr)
+; AMDLIBM: declare void @amd_vrd8_sincos(<8 x double>, ptr, ptr)
+; AMDLIBM: declare void @amd_vrs4_sincosf(<4 x float>, ptr, ptr)
+; AMDLIBM: declare void @amd_vrs8_sincosf(<8 x float>, ptr, ptr)
+; AMDLIBM: declare void @amd_vrs16_sincosf(<16 x float>, ptr, ptr)
 ; AMDLIBM: declare <4 x float> @amd_vrs4_log10f(<4 x float>)
 ; AMDLIBM: declare <8 x float> @amd_vrs8_log10f(<8 x float>)
 ; AMDLIBM: declare <16 x float> @amd_vrs16_log10f(<16 x float>)
@@ -228,6 +240,17 @@ attributes #0 = { nounwind readnone }
 ; AMDLIBM-SAME:   "_ZGV_LLVM_N2v_sin(amd_vrd2_sin),
 ; AMDLIBM-SAME:   _ZGV_LLVM_N4v_sin(amd_vrd4_sin),
 ; AMDLIBM-SAME:   _ZGV_LLVM_N8v_sin(amd_vrd8_sin)" }
+; AMDLIBM:      attributes #[[SINCOS]] = { "vector-function-abi-variant"=
+; AMDLIBM-SAME:   "_ZGV_LLVM_N4vl8l8_sincos(amd_vrd4_sincos),
+; AMDLIBM-SAME:   _ZGV_LLVM_N8vl8l8_sincos(amd_vrd8_sincos)" }
+; AMDLIBM:      attributes #[[SINCOSF]] = { "vector-function-abi-variant"=
+; AMDLIBM-SAME:   "_ZGV_LLVM_N4vl4l4_sincosf(amd_vrs4_sincosf),
+; AMDLIBM-SAME:   _ZGV_LLVM_N8vl4l4_sincosf(amd_vrs8_sincosf),
+; AMDLIBM-SAME:   _ZGV_LLVM_N16vl4l4_sincosf(amd_vrs16_sincosf)" }
+; AMDLIBM:      attributes #[[LOG10]] = { "vector-function-abi-variant"=
+; AMDLIBM-SAME:   "_ZGV_LLVM_N4v_llvm.log10.f32(amd_vrs4_log10f),
+; AMDLIBM-SAME:   _ZGV_LLVM_N8v_llvm.log10.f32(amd_vrs8_log10f),
+; AMDLIBM-SAME:   _ZGV_LLVM_N16v_llvm.log10.f32(amd_vrs16_log10f)" }
 
 ; MASSV:      attributes #[[SIN]] = { "vector-function-abi-variant"=
 ; MASSV-SAME:   "_ZGV_LLVM_N2v_sin(__sind2)" }
diff --git a/llvm/test/Verifier/rtsan-attrs.ll b/llvm/test/Verifier/rtsan-attrs.ll
index fcc44d8d63c1deb..c813266b434f8ce 100644
--- a/llvm/test/Verifier/rtsan-attrs.ll
+++ b/llvm/test/Verifier/rtsan-attrs.ll
@@ -1,9 +1,9 @@
 ; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
 
-; CHECK: Attributes 'sanitize_realtime and sanitize_realtime_unsafe' are incompatible!
+; CHECK: Attributes 'sanitize_realtime and sanitize_realtime_blocking' are incompatible!
 ; CHECK-NEXT: ptr @sanitize_unsafe
 define void @sanitize_unsafe() #0 {
   ret void
 }
 
-attributes #0 = { sanitize_realtime sanitize_realtime_unsafe }
+attributes #0 = { sanitize_realtime sanitize_realtime_blocking }
diff --git a/llvm/test/Verifier/scalable-global-vars.ll b/llvm/test/Verifier/scalable-global-vars.ll
index 81882261e664ef6..fb9a3067acba982 100644
--- a/llvm/test/Verifier/scalable-global-vars.ll
+++ b/llvm/test/Verifier/scalable-global-vars.ll
@@ -15,3 +15,17 @@
 ; CHECK-NEXT: ptr @ScalableVecStructGlobal
 @ScalableVecStructGlobal = global { i32,  <vscale x 4 x i32> } zeroinitializer
 
+; CHECK-NEXT: Globals cannot contain scalable types
+; CHECK-NEXT: ptr @StructTestGlobal
+%struct.test = type { <vscale x 1 x double>, <vscale x 1 x double> }
+@StructTestGlobal = global %struct.test zeroinitializer
+
+; CHECK-NEXT: Globals cannot contain scalable types
+; CHECK-NEXT: ptr @StructArrayTestGlobal
+%struct.array.test = type { [2 x <vscale x 1 x double>] }
+@StructArrayTestGlobal = global %struct.array.test zeroinitializer
+
+; CHECK-NEXT: Globals cannot contain scalable types
+; CHECK-NEXT: ptr @StructTargetTestGlobal
+%struct.target.test = type { target("aarch64.svcount"), target("aarch64.svcount") }
+@StructTargetTestGlobal = global %struct.target.test zeroinitializer
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected
index 7336947a3f57a06..2dc30cd112e4658 100644
--- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm.s.expected
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
 // RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
 
 v_bfrev_b32 v5, v1
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected
index 0a0ad51d15e056d..ca287fc2d632098 100644
--- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
 // RUN: not llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
 
 v_bfrev_b32 v5, v299
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s
new file mode 100644
index 000000000000000..ea03c5a6911fadc
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s
@@ -0,0 +1,5 @@
+// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
+
+v_bfrev_b32 v5, v1
+
+v_bfrev_b32 v1, v1
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected
new file mode 100644
index 000000000000000..57f72ed406fb407
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort.s.expected
@@ -0,0 +1,8 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort
+// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
+
+v_bfrev_b32 v1, v1
+// CHECK: v_bfrev_b32_e32 v1, v1                  ; encoding: [0x01,0x71,0x02,0x7e]
+
+v_bfrev_b32 v5, v1
+// CHECK: v_bfrev_b32_e32 v5, v1                  ; encoding: [0x01,0x71,0x0a,0x7e]
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s
new file mode 100644
index 000000000000000..d60b3bda29eda2d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
+
+v_bfrev_b32 v5, v1 //This is comment A
+
+v_bfrev_b32 v1, v1
+// This is comment B
+
+// This is comment C
+v_bfrev_b32 v2, v1
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected
new file mode 100644
index 000000000000000..6924880032717ea
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_sort_with_comment.s.expected
@@ -0,0 +1,13 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort
+// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
+
+v_bfrev_b32 v1, v1
+// CHECK: v_bfrev_b32_e32 v1, v1                  ; encoding: [0x01,0x71,0x02,0x7e]
+// This is comment B
+
+// This is comment C
+v_bfrev_b32 v2, v1
+// CHECK: v_bfrev_b32_e32 v2, v1                  ; encoding: [0x01,0x71,0x04,0x7e]
+
+v_bfrev_b32 v5, v1 //This is comment A
+// CHECK: v_bfrev_b32_e32 v5, v1                  ; encoding: [0x01,0x71,0x0a,0x7e]
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s
new file mode 100644
index 000000000000000..63240174cdde55f
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
+
+//this is commentA
+v_bfrev_b32 v5, v1
+
+v_bfrev_b32 v5, v1
+
+//this is commentB
+
+//this is commentB
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected
new file mode 100644
index 000000000000000..8203b90040ba4ed
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_unique.s.expected
@@ -0,0 +1,10 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique
+// RUN: llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s
+
+//this is commentA
+v_bfrev_b32 v5, v1
+// CHECK: v_bfrev_b32_e32 v5, v1                  ; encoding: [0x01,0x71,0x0a,0x7e]
+
+//this is commentB
+
+//this is commentB
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected
index a6f7abcb1774ac0..b3cbaff6d1c7ef8 100644
--- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm.txt.expected
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK %s
 
 0x00,0x00,0x00,0x7e
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt
new file mode 100644
index 000000000000000..3d0d49ddeea4256
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK %s
+
+0x00,0x00,0x00,0x7e
+
+0x00,0x00,0x00,0x7e
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected
new file mode 100644
index 000000000000000..32bddb20628dad1
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_dasm_unique.txt.expected
@@ -0,0 +1,5 @@
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK %s
+
+0x00,0x00,0x00,0x7e
+# CHECK: v_nop                                   ; encoding: [0x00,0x00,0x00,0x7e]
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected
index 03a5ec3c559dbfc..7b6b832801625b4 100644
--- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_multirun_dasm.txt.expected
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
+# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py
 # RUN: llvm-mc -triple=amdgcn -mcpu=tonga -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECKA %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding %s 2>&1 | FileCheck -check-prefixes=CHECK,CHECKB %s
 
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test
new file mode 100644
index 000000000000000..f8972ffabf0999c
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-sort.test
@@ -0,0 +1,7 @@
+# REQUIRES: amdgpu-registered-target
+## Check that sort is working
+
+# RUN: cp -f %S/Inputs/amdgpu_asm_sort.s %t.s && %update_mc_test_checks --sort %t.s
+# RUN: diff -u %S/Inputs/amdgpu_asm_sort.s.expected %t.s
+# RUN: cp -f %S/Inputs/amdgpu_asm_sort_with_comment.s %t.s && %update_mc_test_checks --sort %t.s
+# RUN: diff -u %S/Inputs/amdgpu_asm_sort_with_comment.s.expected %t.s
diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test
new file mode 100644
index 000000000000000..8a5d83462cad73d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/amdgpu-unique.test
@@ -0,0 +1,7 @@
+# REQUIRES: amdgpu-registered-target
+## Check that unique is working
+
+# RUN: cp -f %S/Inputs/amdgpu_asm_unique.s %t.s && %update_mc_test_checks --unique %t.s
+# RUN: diff -u %S/Inputs/amdgpu_asm_unique.s.expected %t.s
+# RUN: cp -f %S/Inputs/amdgpu_dasm_unique.txt %t.txt && %update_mc_test_checks --unique %t.txt
+# RUN: diff -u %S/Inputs/amdgpu_dasm_unique.txt.expected %t.txt
diff --git a/llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s b/llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s
new file mode 100644
index 000000000000000..d8b71348a8e40fc
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/MachO/AArch64/aliases.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -filetype=obj -triple=arm64-apple-darwin %s -o %t
+# RUN: llvm-objdump --macho -d -M no-aliases %t | FileCheck %s
+# RUN: llvm-objdump --macho -d --disassembler-options=no-aliases %t | FileCheck %s
+
+# CHECK: orr w1, wzr, w2
+
+# RUN: llvm-objdump --macho -d %t | FileCheck %s --check-prefix=ALIAS
+
+# ALIAS: mov w1, w2
+
+# RUN: not llvm-objdump --macho -d -M unknown %t 2>&1 | FileCheck %s -DFILE=%t --check-prefix=ERR
+
+# ERR: error: '[[FILE]]': unrecognized disassembler option: unknown
+
+mov w1, w2
diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-color.s b/llvm/test/tools/llvm-objdump/X86/disassemble-color.s
new file mode 100644
index 000000000000000..4e1d82562fb546a
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/X86/disassemble-color.s
@@ -0,0 +1,21 @@
+# UNSUPPORTED: system-windows
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn --disassembler-color=on %t | FileCheck %s --check-prefix=ATT
+# RUN: llvm-objdump -d --no-show-raw-insn --disassembler-color=on -M intel %t | FileCheck %s --check-prefix=INTEL
+
+# ATT:      <.text>:
+# ATT-NEXT:  leaq	[0;32m([0;36m%rdx[0;32m,[0;36m%rax[0;32m,[0;31m4[0;32m)[0m, [0;36m%rbx[0m
+# ATT-NEXT:  movq	[0;32m(,[0;36m%rax[0;32m)[0m, [0;36m%rbx[0m
+# ATT-NEXT:  leaq	[0;32m0x3([0;36m%rdx[0;32m,[0;36m%rax[0;32m)[0m, [0;36m%rbx[0m
+# ATT-NEXT:  movq	[0;31m$0x3[0m, [0;36m%rax[0m
+
+# INTEL:      <.text>:
+# INTEL-NEXT:  lea	[0;36mrbx[0m, [0;32m[[0;36mrdx[0;32m + 4*[0;36mrax[0;32m][0m
+# INTEL-NEXT:  mov	[0;36mrbx[0m, qword ptr [0;32m[1*[0;36mrax[0;32m][0m
+# INTEL-NEXT:  lea	[0;36mrbx[0m, [0;32m[[0;36mrdx[0;32m + [0;36mrax[0;32m + [0;31m0x3[0;32m][0m
+# INTEL-NEXT:  mov	[0;36mrax[0m, [0;31m0x3[0m
+
+leaq (%rdx,%rax,4), %rbx
+movq (,%rax), %rbx
+leaq 3(%rdx,%rax), %rbx
+movq $3, %rax
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 20e7e15e3efb551..408b9c39934286f 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -34,7 +34,7 @@
 #
 # CHECK: << Total TLI yes SDK no:  18
 # CHECK: >> Total TLI no  SDK yes: 0
-# CHECK: == Total TLI yes SDK yes: 265
+# CHECK: == Total TLI yes SDK yes: 268
 #
 # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
 # WRONG_DETAIL: >> TLI no  SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
@@ -48,14 +48,14 @@
 # WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
 # WRONG_SUMMARY: << Total TLI yes SDK no:  19{{$}}
 # WRONG_SUMMARY: >> Total TLI no  SDK yes: 1{{$}}
-# WRONG_SUMMARY: == Total TLI yes SDK yes: 264
+# WRONG_SUMMARY: == Total TLI yes SDK yes: 267
 #
 ## The -COUNT suffix doesn't care if there are too many matches, so check
 ## the exact count first; the two directives should add up to that.
 ## Yes, this means additions to TLI will fail this test, but the argument
 ## to -COUNT can't be an expression.
-# AVAIL: TLI knows 516 symbols, 283 available
-# AVAIL-COUNT-283: {{^}} available
+# AVAIL: TLI knows 519 symbols, 286 available
+# AVAIL-COUNT-286: {{^}} available
 # AVAIL-NOT:       {{^}} available
 # UNAVAIL-COUNT-233: not available
 # UNAVAIL-NOT:       not available
@@ -390,6 +390,18 @@ DynamicSymbols:
     Type:            STT_FUNC
     Section:         .text
     Binding:         STB_GLOBAL
+  - Name:            tgamma
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+  - Name:            tgammaf
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
+  - Name:            tgammal
+    Type:            STT_FUNC
+    Section:         .text
+    Binding:         STB_GLOBAL
   - Name:            exp
     Type:            STT_FUNC
     Section:         .text
diff --git a/llvm/tools/llvm-c-test/debuginfo.c b/llvm/tools/llvm-c-test/debuginfo.c
index 942cbe5e5900e41..baf4ddfcc9a37bf 100644
--- a/llvm/tools/llvm-c-test/debuginfo.c
+++ b/llvm/tools/llvm-c-test/debuginfo.c
@@ -163,6 +163,11 @@ int llvm_test_dibuilder(void) {
 
   LLVMSetSubprogram(FooFunction, FunctionMetadata);
 
+  LLVMMetadataRef FooLabel1 = LLVMDIBuilderCreateLabel(DIB, FunctionMetadata,
+    "label1", 6, File, 42, false);
+  LLVMDIBuilderInsertLabelAtEnd(DIB, FooLabel1, FooParamLocation,
+    FooEntryBlock);
+
   LLVMMetadataRef FooLexicalBlock =
     LLVMDIBuilderCreateLexicalBlock(DIB, FunctionMetadata, File, 42, 0);
 
@@ -210,8 +215,6 @@ int llvm_test_dibuilder(void) {
   LLVMAddNamedMetadataOperand(
       M, "EnumTest", LLVMMetadataAsValue(LLVMGetModuleContext(M), EnumTest));
 
-  LLVMDIBuilderFinalize(DIB);
-
   // Using the new debug format, debug records get attached to instructions.
   // Insert a `br` and `ret` now to absorb the debug records which are
   // currently "trailing", meaning that they're associated with a block
@@ -221,6 +224,20 @@ int llvm_test_dibuilder(void) {
   LLVMPositionBuilderAtEnd(Builder, FooEntryBlock);
   // Build `br label %vars` in entry.
   LLVMBuildBr(Builder, FooVarBlock);
+
+  // Build another br for the sake of testing labels.
+  LLVMMetadataRef FooLabel2 = LLVMDIBuilderCreateLabel(DIB, FunctionMetadata,
+    "label2", 6, File, 42, false);
+  LLVMDIBuilderInsertLabelBefore(DIB, FooLabel2, FooParamLocation,
+    LLVMBuildBr(Builder, FooVarBlock));
+  // label3 will be emitted, but label4 won't be emitted
+  // because label3 is AlwaysPreserve and label4 is not.
+  LLVMDIBuilderCreateLabel(DIB, FunctionMetadata,
+    "label3", 6, File, 42, true);
+  LLVMDIBuilderCreateLabel(DIB, FunctionMetadata,
+    "label4", 6, File, 42, false);
+  LLVMDIBuilderFinalize(DIB);
+
   // Build `ret i64 0` in vars.
   LLVMPositionBuilderAtEnd(Builder, FooVarBlock);
   LLVMTypeRef I64 = LLVMInt64TypeInContext(Ctx);
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index b8afb560d2ae9c2..ab6f65cd41a3657 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -7330,6 +7330,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
   // comment causing different diffs with the 'C' disassembler library API.
   // IP->setCommentStream(CommentStream);
 
+  for (StringRef Opt : DisassemblerOptions)
+    if (!IP->applyTargetSpecificCLOption(Opt))
+      reportError(Filename, "unrecognized disassembler option: " + Opt);
+
   // Set up separate thumb disassembler if needed.
   std::unique_ptr<const MCRegisterInfo> ThumbMRI;
   std::unique_ptr<const MCAsmInfo> ThumbAsmInfo;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 8073c898b8a147d..86ba9193dff2d14 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -305,11 +305,11 @@ bool objdump::ArchiveHeaders;
 bool objdump::Demangle;
 bool objdump::Disassemble;
 bool objdump::DisassembleAll;
+std::vector<std::string> objdump::DisassemblerOptions;
 bool objdump::SymbolDescription;
 bool objdump::TracebackTable;
 static std::vector<std::string> DisassembleSymbols;
 static bool DisassembleZeroes;
-static std::vector<std::string> DisassemblerOptions;
 static ColorOutput DisassemblyColor;
 DIDumpType objdump::DwarfDumpType;
 static bool DynamicRelocations;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h
index 7778cf6c2784eb7..debaedd33429d02 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -50,6 +50,7 @@ extern DebugVarsFormat DbgVariables;
 extern bool Demangle;
 extern bool Disassemble;
 extern bool DisassembleAll;
+extern std::vector<std::string> DisassemblerOptions;
 extern DIDumpType DwarfDumpType;
 extern std::vector<std::string> FilterSections;
 extern bool LeadingAddr;
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 59f0f1f1fae8992..f7023aa966adf6f 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -342,6 +342,15 @@ cl::opt<bool> MemProfFullSchema(
     "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand),
     cl::desc("Use the full schema for serialization"), cl::init(false));
 
+static cl::opt<bool>
+    MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false),
+                                 cl::Hidden, cl::sub(MergeSubcommand),
+                                 cl::desc("Generate random hotness values"));
+static cl::opt<unsigned> MemprofGenerateRandomHotnessSeed(
+    "memprof-random-hotness-seed", cl::init(0), cl::Hidden,
+    cl::sub(MergeSubcommand),
+    cl::desc("Random hotness seed to use (0 to generate new seed)"));
+
 // Options specific to overlap subcommand.
 cl::opt<std::string> BaseFilename(cl::Positional, cl::Required,
                                   cl::desc("<base profile file>"),
@@ -641,7 +650,8 @@ struct WriterContext {
                 SmallSet<instrprof_error, 4> &WriterErrorCodes,
                 uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
       : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion,
-               MemProfVersionRequested, MemProfFullSchema),
+               MemProfVersionRequested, MemProfFullSchema,
+               MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed),
         ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
 };
 
diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt
index 745e4d9fb74a4a6..b0077d5b54a3ee5 100644
--- a/llvm/unittests/ADT/CMakeLists.txt
+++ b/llvm/unittests/ADT/CMakeLists.txt
@@ -86,6 +86,7 @@ add_llvm_unittest(ADTTests
   StringSetTest.cpp
   StringSwitchTest.cpp
   TinyPtrVectorTest.cpp
+  TrieRawHashMapTest.cpp
   TwineTest.cpp
   TypeSwitchTest.cpp
   TypeTraitsTest.cpp
diff --git a/llvm/unittests/ADT/TrieRawHashMapTest.cpp b/llvm/unittests/ADT/TrieRawHashMapTest.cpp
new file mode 100644
index 000000000000000..c9081f547812e90
--- /dev/null
+++ b/llvm/unittests/ADT/TrieRawHashMapTest.cpp
@@ -0,0 +1,346 @@
+//===- TrieRawHashMapTest.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/TrieRawHashMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/SHA1.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace llvm {
+class TrieRawHashMapTestHelper {
+public:
+  TrieRawHashMapTestHelper() = default;
+
+  void setTrie(ThreadSafeTrieRawHashMapBase *T) { Trie = T; }
+
+  ThreadSafeTrieRawHashMapBase::PointerBase getRoot() const {
+    return Trie->getRoot();
+  }
+  unsigned getStartBit(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+    return Trie->getStartBit(P);
+  }
+  unsigned getNumBits(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+    return Trie->getNumBits(P);
+  }
+  unsigned getNumSlotUsed(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+    return Trie->getNumSlotUsed(P);
+  }
+  unsigned getNumTries() const { return Trie->getNumTries(); }
+  std::string
+  getTriePrefixAsString(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+    return Trie->getTriePrefixAsString(P);
+  }
+  ThreadSafeTrieRawHashMapBase::PointerBase
+  getNextTrie(ThreadSafeTrieRawHashMapBase::PointerBase P) const {
+    return Trie->getNextTrie(P);
+  }
+
+private:
+  ThreadSafeTrieRawHashMapBase *Trie = nullptr;
+};
+} // namespace llvm
+
+namespace {
+template <typename DataType, size_t HashSize = sizeof(uint64_t)>
+class SimpleTrieHashMapTest : public TrieRawHashMapTestHelper,
+                              public ::testing::Test {
+public:
+  using NumType = DataType;
+  using HashType = std::array<uint8_t, HashSize>;
+  using TrieType = ThreadSafeTrieRawHashMap<DataType, sizeof(HashType)>;
+
+  TrieType &createTrie(size_t RootBits, size_t SubtrieBits) {
+    auto &Ret = Trie.emplace(RootBits, SubtrieBits);
+    TrieRawHashMapTestHelper::setTrie(&Ret);
+    return Ret;
+  }
+
+  void destroyTrie() { Trie.reset(); }
+  ~SimpleTrieHashMapTest() { destroyTrie(); }
+
+  // Use the number itself as hash to test the pathological case.
+  static HashType hash(uint64_t Num) {
+    uint64_t HashN =
+        llvm::support::endian::byte_swap(Num, llvm::endianness::big);
+    HashType Hash;
+    memcpy(&Hash[0], &HashN, sizeof(HashType));
+    return Hash;
+  };
+
+private:
+  std::optional<TrieType> Trie;
+};
+
+using SmallNodeTrieTest = SimpleTrieHashMapTest<uint64_t>;
+
+TEST_F(SmallNodeTrieTest, TrieAllocation) {
+  NumType Numbers[] = {
+      0x0, std::numeric_limits<NumType>::max(),      0x1, 0x2,
+      0x3, std::numeric_limits<NumType>::max() - 1u,
+  };
+
+  unsigned ExpectedTries[] = {
+      1,       // Allocate Root.
+      1,       // Both on the root.
+      64,      // 0 and 1 sinks all the way down.
+      64,      // no new allocation needed.
+      65,      // need a new node between 2 and 3.
+      65 + 63, // 63 new allocation to sink two big numbers all the way.
+  };
+
+  const char *ExpectedPrefix[] = {
+      "", // Root.
+      "", // Root.
+      "00000000000000[0000000]",
+      "00000000000000[0000000]",
+      "00000000000000[0000001]",
+      "ffffffffffffff[1111111]",
+  };
+
+  // Use root and subtrie sizes of 1 so this gets sunk quite deep.
+  auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
+
+  for (unsigned I = 0; I < 6; ++I) {
+    // Lookup first to exercise hint code for deep tries.
+    TrieType::pointer Lookup = Trie.find(hash(Numbers[I]));
+    EXPECT_FALSE(Lookup);
+
+    Trie.insert(Lookup, TrieType::value_type(hash(Numbers[I]), Numbers[I]));
+    EXPECT_EQ(getNumTries(), ExpectedTries[I]);
+    EXPECT_EQ(getTriePrefixAsString(getNextTrie(getRoot())), ExpectedPrefix[I]);
+  }
+}
+
+TEST_F(SmallNodeTrieTest, TrieStructure) {
+  NumType Numbers[] = {
+      // Three numbers that will nest deeply to test (1) sinking subtries and
+      // (2) deep, non-trivial hints.
+      std::numeric_limits<NumType>::max(),
+      std::numeric_limits<NumType>::max() - 2u,
+      std::numeric_limits<NumType>::max() - 3u,
+      // One number to stay at the top-level.
+      0x37,
+  };
+
+  // Use root and subtrie sizes of 1 so this gets sunk quite deep.
+  auto &Trie = createTrie(/*RootBits=*/1, /*SubtrieBits=*/1);
+
+  for (NumType N : Numbers) {
+    // Lookup first to exercise hint code for deep tries.
+    TrieType::pointer Lookup = Trie.find(hash(N));
+    EXPECT_FALSE(Lookup);
+
+    Trie.insert(Lookup, TrieType::value_type(hash(N), N));
+  }
+  for (NumType N : Numbers) {
+    TrieType::pointer Lookup = Trie.find(hash(N));
+    EXPECT_TRUE(Lookup);
+    if (!Lookup)
+      continue;
+    EXPECT_EQ(hash(N), Lookup->Hash);
+    EXPECT_EQ(N, Lookup->Data);
+
+    // Confirm a subsequent insertion fails to overwrite by trying to insert a
+    // bad value.
+    auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1));
+    EXPECT_EQ(N, Result->Data);
+  }
+
+  // Check the trie so we can confirm the structure is correct. Each subtrie
+  // should have 2 slots. The root's index=0 should have the content for
+  // 0x37 directly, and index=1 should be a linked-list of subtries, finally
+  // ending with content for (max-2) and (max-3).
+  //
+  // Note: This structure is not exhaustive (too expensive to update tests),
+  // but it does test that the dump format is somewhat readable and that the
+  // basic structure is correct.
+  //
+  // Note: This test requires that the trie reads bytes starting from index 0
+  // of the array of uint8_t, and then reads each byte's bits from high to low.
+
+  // Check the Trie.
+  // We should allocated a total of 64 SubTries for 64 bit hash.
+  ASSERT_EQ(getNumTries(), 64u);
+  // Check the root trie. Two slots and both are used.
+  ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
+  // Check last subtrie.
+  // Last allocated trie is the next node in the allocation chain.
+  auto LastAlloctedSubTrie = getNextTrie(getRoot());
+  ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie),
+            "ffffffffffffff[1111110]");
+  ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u);
+  ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u);
+  ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u);
+}
+
+TEST_F(SmallNodeTrieTest, TrieStructureSmallFinalSubtrie) {
+  NumType Numbers[] = {
+      // Three numbers that will nest deeply to test (1) sinking subtries and
+      // (2) deep, non-trivial hints.
+      std::numeric_limits<NumType>::max(),
+      std::numeric_limits<NumType>::max() - 2u,
+      std::numeric_limits<NumType>::max() - 3u,
+      // One number to stay at the top-level.
+      0x37,
+  };
+
+  // Use subtrie size of 5 to avoid hitting 64 evenly, making the final subtrie
+  // small.
+  auto &Trie = createTrie(/*RootBits=*/8, /*SubtrieBits=*/5);
+
+  for (NumType N : Numbers) {
+    // Lookup first to exercise hint code for deep tries.
+    TrieType::pointer Lookup = Trie.find(hash(N));
+    EXPECT_FALSE(Lookup);
+
+    Trie.insert(Lookup, TrieType::value_type(hash(N), N));
+  }
+  for (NumType N : Numbers) {
+    TrieType::pointer Lookup = Trie.find(hash(N));
+    ASSERT_TRUE(Lookup);
+    EXPECT_EQ(hash(N), Lookup->Hash);
+    EXPECT_EQ(N, Lookup->Data);
+
+    // Confirm a subsequent insertion fails to overwrite by trying to insert a
+    // bad value.
+    auto Result = Trie.insert(Lookup, TrieType::value_type(hash(N), N - 1));
+    EXPECT_EQ(N, Result->Data);
+  }
+
+  // Check the trie so we can confirm the structure is correct. The root
+  // should have 2^8=256 slots, most subtries should have 2^5=32 slots, and the
+  // deepest subtrie should have 2^1=2 slots (since (64-8)mod(5)=1).
+  // should have 2 slots. The root's index=0 should have the content for
+  // 0x37 directly, and index=1 should be a linked-list of subtries, finally
+  // ending with content for (max-2) and (max-3).
+  //
+  // Note: This structure is not exhaustive (too expensive to update tests),
+  // but it does test that the dump format is somewhat readable and that the
+  // basic structure is correct.
+  //
+  // Note: This test requires that the trie reads bytes starting from index 0
+  // of the array of uint8_t, and then reads each byte's bits from high to low.
+
+  // Check the Trie.
+  // 64 bit hash = 8 + 5 * 11 + 1, so 1 root, 11 8bit subtrie and 1 last level
+  // subtrie, 13 total.
+  ASSERT_EQ(getNumTries(), 13u);
+  // Check the root trie. Two slots and both are used.
+  ASSERT_EQ(getNumSlotUsed(getRoot()), 2u);
+  // Check last subtrie.
+  // Last allocated trie is the next node in the allocation chain.
+  auto LastAlloctedSubTrie = getNextTrie(getRoot());
+  ASSERT_EQ(getTriePrefixAsString(LastAlloctedSubTrie),
+            "ffffffffffffff[1111110]");
+  ASSERT_EQ(getStartBit(LastAlloctedSubTrie), 63u);
+  ASSERT_EQ(getNumBits(LastAlloctedSubTrie), 1u);
+  ASSERT_EQ(getNumSlotUsed(LastAlloctedSubTrie), 2u);
+}
+
+TEST_F(SmallNodeTrieTest, TrieDestructionLoop) {
+  // Test destroying large Trie. Make sure there is no recursion that can
+  // overflow the stack.
+
+  // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
+  auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
+
+  // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
+  // builds.
+  static constexpr uint64_t MaxN = 100000;
+  for (uint64_t N = 0; N != MaxN; ++N) {
+    HashType Hash = hash(N);
+    Trie.insert(TrieType::pointer(), TrieType::value_type(Hash, NumType{N}));
+  }
+
+  // Destroy tries. If destruction is recursive and MaxN is high enough, these
+  // will both fail.
+  destroyTrie();
+}
+
+struct NumWithDestructorT {
+  uint64_t Num;
+  llvm::function_ref<void()> DestructorCallback;
+  ~NumWithDestructorT() { DestructorCallback(); }
+};
+
+using NodeWithDestructorTrieTest = SimpleTrieHashMapTest<NumWithDestructorT>;
+
+TEST_F(NodeWithDestructorTrieTest, TrieDestructionLoop) {
+  // Test destroying large Trie. Make sure there is no recursion that can
+  // overflow the stack.
+
+  // Limit the tries to 2 slots (1 bit) to generate subtries at a higher rate.
+  auto &Trie = createTrie(/*NumRootBits=*/1, /*NumSubtrieBits=*/1);
+
+  // Fill them up. Pick a MaxN high enough to cause a stack overflow in debug
+  // builds.
+  static constexpr uint64_t MaxN = 100000;
+
+  uint64_t DestructorCalled = 0;
+  auto DtorCallback = [&DestructorCalled]() { ++DestructorCalled; };
+  for (uint64_t N = 0; N != MaxN; ++N) {
+    HashType Hash = hash(N);
+    Trie.insert(TrieType::pointer(),
+                TrieType::value_type(Hash, NumType{N, DtorCallback}));
+  }
+  // Reset the count after all the temporaries get destroyed.
+  DestructorCalled = 0;
+
+  // Destroy tries. If destruction is recursive and MaxN is high enough, these
+  // will both fail.
+  destroyTrie();
+
+  // Count the number of destructor calls during `destroyTrie()`.
+  ASSERT_EQ(DestructorCalled, MaxN);
+}
+
+using NumStrNodeTrieTest = SimpleTrieHashMapTest<std::string>;
+
+TEST_F(NumStrNodeTrieTest, TrieInsertLazy) {
+  for (unsigned RootBits : {2, 3, 6, 10}) {
+    for (unsigned SubtrieBits : {2, 3, 4}) {
+      auto &Trie = createTrie(RootBits, SubtrieBits);
+      for (int I = 0, E = 1000; I != E; ++I) {
+        TrieType::pointer Lookup;
+        HashType H = hash(I);
+        if (I & 1)
+          Lookup = Trie.find(H);
+
+        auto insertNum = [&](uint64_t Num) {
+          std::string S = Twine(I).str();
+          auto Hash = hash(Num);
+          return Trie.insertLazy(
+              Hash, [&](TrieType::LazyValueConstructor C) { C(std::move(S)); });
+        };
+        auto S1 = insertNum(I);
+        // The address of the Data should be the same.
+        EXPECT_EQ(&S1->Data, &insertNum(I)->Data);
+
+        auto insertStr = [&](std::string S) {
+          int Num = std::stoi(S);
+          return insertNum(Num);
+        };
+        std::string S2 = S1->Data;
+        // The address of the Data should be the same.
+        EXPECT_EQ(&S1->Data, &insertStr(S2)->Data);
+      }
+      for (int I = 0, E = 1000; I != E; ++I) {
+        std::string S = Twine(I).str();
+        TrieType::pointer Lookup = Trie.find(hash(I));
+        EXPECT_TRUE(Lookup);
+        if (!Lookup)
+          continue;
+        EXPECT_EQ(S, Lookup->Data);
+      }
+    }
+  }
+}
+} // end anonymous namespace
diff --git a/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt b/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt
index 22cd0cb1d9a034d..deabf110f2e4399 100644
--- a/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt
+++ b/llvm/unittests/Analysis/InlineAdvisorPlugin/CMakeLists.txt
@@ -2,7 +2,7 @@
 # libraries, but expects them to exist in the process loading the plugin. This 
 # doesn't work with DLLs on Windows (where a shared library can't have undefined
 # references), so just skip this testcase on Windows.
-if (NOT WIN32 AND NOT CYGWIN)
+if ((NOT WIN32 OR LLVM_BUILD_LLVM_DYLIB) AND NOT CYGWIN)
   unset(LLVM_LINK_COMPONENTS)
   add_llvm_library(InlineAdvisorPlugin MODULE BUILDTREE_ONLY
     InlineAdvisorPlugin.cpp
diff --git a/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt b/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt
index cc470a934426d54..0b37cebe3da6de8 100644
--- a/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt
+++ b/llvm/unittests/Analysis/InlineOrderPlugin/CMakeLists.txt
@@ -2,7 +2,7 @@
 # libraries, but expects them to exist in the process loading the plugin. This 
 # doesn't work with DLLs on Windows (where a shared library can't have undefined
 # references), so just skip this testcase on Windows.
-if (NOT WIN32 AND NOT CYGWIN)
+if ((NOT WIN32 OR LLVM_BUILD_LLVM_DYLIB) AND NOT CYGWIN)
   unset(LLVM_LINK_COMPONENTS)
   add_llvm_library(InlineOrderPlugin MODULE BUILDTREE_ONLY
     InlineOrderPlugin.cpp
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 346940384aff910..98f8989d4e6e9e9 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -303,6 +303,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare double @erf(double)\n"
       "declare float @erff(float)\n"
       "declare x86_fp80 @erfl(x86_fp80)\n"
+      "declare double @tgamma(double)\n"
+      "declare float @tgammaf(float)\n"
+      "declare x86_fp80 @tgammal(x86_fp80)\n"
       "declare i32 @printf(i8*, ...)\n"
       "declare i32 @putc(i32, %struct*)\n"
       "declare i32 @putc_unlocked(i32, %struct*)\n"
diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt
index 911ede701982f61..8892f3e75729ab3 100644
--- a/llvm/unittests/CMakeLists.txt
+++ b/llvm/unittests/CMakeLists.txt
@@ -14,6 +14,20 @@ function(add_llvm_target_unittest test_dir_name)
   add_llvm_unittest(${test_dir_name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN})
 endfunction()
 
+# gtest macros like EXPECT_TRUE are expanded to a single line
+# multi-statement code with if/else. eg:
+#   if (...)
+#     EXPECT_TURE(...)
+# will be expanded into something like:
+#   if(...)
+#     switch (0) case 0: default: if (...) ; else return;;
+# GCC may emit false positive dangling-else warnings for such code.
+# However, such warnings are actually against LLVM's style guide.
+# disable the warning for GCC so that one can enbable Werror.
+if (CMAKE_COMPILER_IS_GNUCXX)
+  list(APPEND LLVM_COMPILE_FLAGS "-Wno-dangling-else")
+endif ()
+
 add_subdirectory(ADT)
 add_subdirectory(Analysis)
 add_subdirectory(AsmParser)
diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
index 3df72ec8115b6aa..ffedb2c74220f04 100644
--- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
+++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "../lib/Target/AArch64/AArch64ISelLowering.h"
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/AsmParser/Parser.h"
@@ -167,6 +168,18 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_EXTRACT_SUBVECTOR) {
   EXPECT_EQ(DAG->ComputeNumSignBits(Op, DemandedElts), 7u);
 }
 
+TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) {
+  SDLoc Loc;
+  auto VecVT = MVT::v8i8;
+  auto Shift = DAG->getConstant(4, Loc, MVT::i32);
+  auto Vec0 = DAG->getConstant(1, Loc, VecVT);
+  auto Op1 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, Vec0, Shift);
+  EXPECT_EQ(DAG->ComputeNumSignBits(Op1), 8u);
+  auto VecA = DAG->getConstant(0xaa, Loc, VecVT);
+  auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift);
+  EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u);
+}
+
 TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) {
   TargetLowering TL(*TM);
 
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp
index 17fb18fc6b4d24b..2be656547c92e01 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp
@@ -174,6 +174,7 @@ TEST(DWARFDebugFrame, InvalidCFIOpcodesTest) {
       dwarf::DW_CFA_MIPS_advance_loc8,
       dwarf::DW_CFA_GNU_window_save,
       dwarf::DW_CFA_AARCH64_negate_ra_state,
+      dwarf::DW_CFA_AARCH64_negate_ra_state_with_pc,
       dwarf::DW_CFA_GNU_args_size};
 
   dwarf::CIE TestCIE = createCIE(/*IsDWARF64=*/false,
diff --git a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt
index 82d277309307cb9..d1c7b799880a3b7 100644
--- a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt
+++ b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt
@@ -11,8 +11,9 @@ add_llvm_unittest(JITLinkTests
     AArch32Tests.cpp
     AArch32ErrorTests.cpp
     EHFrameSupportTests.cpp
-    JITLinkMocks.cpp
+    JITLinkTestUtils.cpp
     LinkGraphTests.cpp
+    MachOLinkGraphTests.cpp
     MemoryManagerErrorTests.cpp
     StubsTests.cpp
   )
diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp b/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp
deleted file mode 100644
index c40ce7adb0b5ea4..000000000000000
--- a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-//===--------- JITLinkMocks.cpp - Mock APIs for JITLink unit tests --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "JITLinkMocks.h"
-#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
-
-#include "llvm/Testing/Support/Error.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-using namespace llvm::orc;
-using namespace llvm::jitlink;
-
-void lookupResolveEverythingToNull(
-    const llvm::jitlink::JITLinkContext::LookupMap &Symbols,
-    std::unique_ptr<llvm::jitlink::JITLinkAsyncLookupContinuation> LC) {
-  llvm::orc::ExecutorAddr Null;
-  llvm::jitlink::AsyncLookupResult Result;
-  for (auto &KV : Symbols)
-    Result[KV.first] = {Null, llvm::JITSymbolFlags::Exported};
-  LC->run(std::move(Result));
-}
-
-void lookupErrorOut(
-    const llvm::jitlink::JITLinkContext::LookupMap &Symbols,
-    std::unique_ptr<llvm::jitlink::JITLinkAsyncLookupContinuation> LC) {
-  LC->run(llvm::make_error<llvm::StringError>("Lookup failed",
-                                              llvm::inconvertibleErrorCode()));
-}
-
-std::unique_ptr<MockJITLinkContext> makeMockContext(
-    llvm::unique_function<void(llvm::Error)> HandleFailed,
-    llvm::unique_function<void(MockJITLinkMemoryManager &)> SetupMemMgr,
-    llvm::unique_function<void(MockJITLinkContext &)> SetupContext) {
-  auto MemMgr = std::make_unique<MockJITLinkMemoryManager>();
-  SetupMemMgr(*MemMgr);
-  auto Ctx = std::make_unique<MockJITLinkContext>(std::move(MemMgr),
-                                                  std::move(HandleFailed));
-  SetupContext(*Ctx);
-  return Ctx;
-}
-
-void defaultMemMgrSetup(MockJITLinkMemoryManager &) {}
-void defaultCtxSetup(MockJITLinkContext &) {}
-
-TEST(JITLinkMocks, SmokeTest) {
-  // Check that the testing infrastructure defaults can "link" a graph
-  // successfully.
-  auto G = std::make_unique<LinkGraph>("foo", Triple("x86_64-apple-darwin"), 8,
-                                       llvm::endianness::little,
-                                       getGenericEdgeKindName);
-
-  ArrayRef<char> Content = "hello, world!";
-  auto &Sec =
-      G->createSection("__data", orc::MemProt::Read | orc::MemProt::Write);
-  orc::ExecutorAddr B1Addr(0x1000);
-  auto &B = G->createContentBlock(Sec, Content, B1Addr, 8, 0);
-  G->addDefinedSymbol(B, 4, "S", 4, Linkage::Strong, Scope::Default, false,
-                      false);
-
-  Error Err = Error::success();
-  auto Ctx =
-      makeMockContext(JoinErrorsInto(Err), defaultMemMgrSetup, defaultCtxSetup);
-
-  link_MachO_x86_64(std::move(G), std::move(Ctx));
-
-  EXPECT_THAT_ERROR(std::move(Err), Succeeded());
-}
diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp
new file mode 100644
index 000000000000000..9a7878edab5045b
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.cpp
@@ -0,0 +1,114 @@
+//===------- JITLinkTestUtils.cpp - Utilities for JITLink unit tests ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITLinkTestUtils.h"
+#include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
+
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::jitlink;
+
+static const char BlockContentBytes[] = {
+    0x54, 0x68, 0x65, 0x72, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, 0x6d, 0x6f,
+    0x76, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68,
+    0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x66,
+    0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x20,
+    0x68, 0x61, 0x64, 0x20, 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x20, 0x61,
+    0x72, 0x6f, 0x75, 0x6e, 0x64, 0x0a, 0x54, 0x68, 0x61, 0x74, 0x20, 0x74,
+    0x68, 0x65, 0x20, 0x63, 0x6f, 0x6c, 0x74, 0x20, 0x66, 0x72, 0x6f, 0x6d,
+    0x20, 0x4f, 0x6c, 0x64, 0x20, 0x52, 0x65, 0x67, 0x72, 0x65, 0x74, 0x20,
+    0x68, 0x61, 0x64, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x77, 0x61, 0x79,
+    0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x68, 0x61, 0x64, 0x20, 0x6a, 0x6f,
+    0x69, 0x6e, 0x65, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69, 0x6c,
+    0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73, 0x65,
+    0x73, 0x20, 0x2d, 0x2d, 0x20, 0x68, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20,
+    0x77, 0x6f, 0x72, 0x74, 0x68, 0x20, 0x61, 0x20, 0x74, 0x68, 0x6f, 0x75,
+    0x73, 0x61, 0x6e, 0x64, 0x20, 0x70, 0x6f, 0x75, 0x6e, 0x64, 0x2c, 0x0a,
+    0x53, 0x6f, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63,
+    0x72, 0x61, 0x63, 0x6b, 0x73, 0x20, 0x68, 0x61, 0x64, 0x20, 0x67, 0x61,
+    0x74, 0x68, 0x65, 0x72, 0x65, 0x64, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68,
+    0x65, 0x20, 0x66, 0x72, 0x61, 0x79, 0x2e, 0x0a, 0x41, 0x6c, 0x6c, 0x20,
+    0x74, 0x68, 0x65, 0x20, 0x74, 0x72, 0x69, 0x65, 0x64, 0x20, 0x61, 0x6e,
+    0x64, 0x20, 0x6e, 0x6f, 0x74, 0x65, 0x64, 0x20, 0x72, 0x69, 0x64, 0x65,
+    0x72, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, 0x20,
+    0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x20, 0x6e, 0x65, 0x61,
+    0x72, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x66, 0x61, 0x72, 0x0a, 0x48, 0x61,
+    0x64, 0x20, 0x6d, 0x75, 0x73, 0x74, 0x65, 0x72, 0x65, 0x64, 0x20, 0x61,
+    0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x68, 0x6f, 0x6d, 0x65, 0x73, 0x74,
+    0x65, 0x61, 0x64, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x6e, 0x69, 0x67, 0x68,
+    0x74, 0x2c, 0x0a, 0x46, 0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x62,
+    0x75, 0x73, 0x68, 0x6d, 0x65, 0x6e, 0x20, 0x6c, 0x6f, 0x76, 0x65, 0x20,
+    0x68, 0x61, 0x72, 0x64, 0x20, 0x72, 0x69, 0x64, 0x69, 0x6e, 0x67, 0x20,
+    0x77, 0x68, 0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69,
+    0x6c, 0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73,
+    0x65, 0x73, 0x20, 0x61, 0x72, 0x65, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20,
+    0x74, 0x68, 0x65, 0x20, 0x73, 0x74, 0x6f, 0x63, 0x6b, 0x2d, 0x68, 0x6f,
+    0x72, 0x73, 0x65, 0x20, 0x73, 0x6e, 0x75, 0x66, 0x66, 0x73, 0x20, 0x74,
+    0x68, 0x65, 0x20, 0x62, 0x61, 0x74, 0x74, 0x6c, 0x65, 0x20, 0x77, 0x69,
+    0x74, 0x68, 0x20, 0x64, 0x65, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x00};
+
+ArrayRef<char> BlockContent(BlockContentBytes);
+
+void lookupResolveEverythingToNull(
+    const llvm::jitlink::JITLinkContext::LookupMap &Symbols,
+    std::unique_ptr<llvm::jitlink::JITLinkAsyncLookupContinuation> LC) {
+  llvm::orc::ExecutorAddr Null;
+  llvm::jitlink::AsyncLookupResult Result;
+  for (auto &KV : Symbols)
+    Result[KV.first] = {Null, llvm::JITSymbolFlags::Exported};
+  LC->run(std::move(Result));
+}
+
+void lookupErrorOut(
+    const llvm::jitlink::JITLinkContext::LookupMap &Symbols,
+    std::unique_ptr<llvm::jitlink::JITLinkAsyncLookupContinuation> LC) {
+  LC->run(llvm::make_error<llvm::StringError>("Lookup failed",
+                                              llvm::inconvertibleErrorCode()));
+}
+
+std::unique_ptr<MockJITLinkContext> makeMockContext(
+    llvm::unique_function<void(llvm::Error)> HandleFailed,
+    llvm::unique_function<void(MockJITLinkMemoryManager &)> SetupMemMgr,
+    llvm::unique_function<void(MockJITLinkContext &)> SetupContext) {
+  auto MemMgr = std::make_unique<MockJITLinkMemoryManager>();
+  SetupMemMgr(*MemMgr);
+  auto Ctx = std::make_unique<MockJITLinkContext>(std::move(MemMgr),
+                                                  std::move(HandleFailed));
+  SetupContext(*Ctx);
+  return Ctx;
+}
+
+void defaultMemMgrSetup(MockJITLinkMemoryManager &) {}
+void defaultCtxSetup(MockJITLinkContext &) {}
+
+TEST(JITLinkMocks, SmokeTest) {
+  // Check that the testing infrastructure defaults can "link" a graph
+  // successfully.
+  auto G = std::make_unique<LinkGraph>("foo", Triple("x86_64-apple-darwin"), 8,
+                                       llvm::endianness::little,
+                                       getGenericEdgeKindName);
+
+  ArrayRef<char> Content = "hello, world!";
+  auto &Sec =
+      G->createSection("__data", orc::MemProt::Read | orc::MemProt::Write);
+  orc::ExecutorAddr B1Addr(0x1000);
+  auto &B = G->createContentBlock(Sec, Content, B1Addr, 8, 0);
+  G->addDefinedSymbol(B, 4, "S", 4, Linkage::Strong, Scope::Default, false,
+                      false);
+
+  Error Err = Error::success();
+  auto Ctx =
+      makeMockContext(JoinErrorsInto(Err), defaultMemMgrSetup, defaultCtxSetup);
+
+  link_MachO_x86_64(std::move(G), std::move(Ctx));
+
+  EXPECT_THAT_ERROR(std::move(Err), Succeeded());
+}
diff --git a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.h b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.h
similarity index 95%
rename from llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.h
rename to llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.h
index 8c1e3ff2c77db58..dc077f900d19585 100644
--- a/llvm/unittests/ExecutionEngine/JITLink/JITLinkMocks.h
+++ b/llvm/unittests/ExecutionEngine/JITLink/JITLinkTestUtils.h
@@ -1,4 +1,4 @@
-//===----- JITLinkMocks.h - Mock APIs for JITLink unit tests ----*- C++ -*-===//
+//===--- JITLinkTestUtils.h - Utilities for JITLink unit tests --*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,12 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Mock APIs for JITLink unit tests.
+// Utilities for JITLink unit tests.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKMOCKS_H
-#define LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKMOCKS_H
+#ifndef LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKTESTUTILS_H
+#define LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKTESTUTILS_H
 
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 
@@ -225,4 +225,6 @@ class JoinErrorsInto {
   llvm::Error &Err;
 };
 
-#endif // LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKMOCKS_H
+extern llvm::ArrayRef<char> BlockContent;
+
+#endif // LLVM_UNITTESTS_EXECUTIONENGINE_JITLINK_JITLINKTESTUTILS_H
diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp
index 5eea21184619522..32d917d75d5ca42 100644
--- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp
+++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "JITLinkTestUtils.h"
+
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
@@ -17,47 +19,6 @@
 using namespace llvm;
 using namespace llvm::jitlink;
 
-static const char BlockContentBytes[] = {
-    0x54, 0x68, 0x65, 0x72, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20, 0x6d, 0x6f,
-    0x76, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68,
-    0x65, 0x20, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2c, 0x20, 0x66,
-    0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x6f, 0x72, 0x64, 0x20,
-    0x68, 0x61, 0x64, 0x20, 0x70, 0x61, 0x73, 0x73, 0x65, 0x64, 0x20, 0x61,
-    0x72, 0x6f, 0x75, 0x6e, 0x64, 0x0a, 0x54, 0x68, 0x61, 0x74, 0x20, 0x74,
-    0x68, 0x65, 0x20, 0x63, 0x6f, 0x6c, 0x74, 0x20, 0x66, 0x72, 0x6f, 0x6d,
-    0x20, 0x4f, 0x6c, 0x64, 0x20, 0x52, 0x65, 0x67, 0x72, 0x65, 0x74, 0x20,
-    0x68, 0x61, 0x64, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x77, 0x61, 0x79,
-    0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x68, 0x61, 0x64, 0x20, 0x6a, 0x6f,
-    0x69, 0x6e, 0x65, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69, 0x6c,
-    0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73, 0x65,
-    0x73, 0x20, 0x2d, 0x2d, 0x20, 0x68, 0x65, 0x20, 0x77, 0x61, 0x73, 0x20,
-    0x77, 0x6f, 0x72, 0x74, 0x68, 0x20, 0x61, 0x20, 0x74, 0x68, 0x6f, 0x75,
-    0x73, 0x61, 0x6e, 0x64, 0x20, 0x70, 0x6f, 0x75, 0x6e, 0x64, 0x2c, 0x0a,
-    0x53, 0x6f, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63,
-    0x72, 0x61, 0x63, 0x6b, 0x73, 0x20, 0x68, 0x61, 0x64, 0x20, 0x67, 0x61,
-    0x74, 0x68, 0x65, 0x72, 0x65, 0x64, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68,
-    0x65, 0x20, 0x66, 0x72, 0x61, 0x79, 0x2e, 0x0a, 0x41, 0x6c, 0x6c, 0x20,
-    0x74, 0x68, 0x65, 0x20, 0x74, 0x72, 0x69, 0x65, 0x64, 0x20, 0x61, 0x6e,
-    0x64, 0x20, 0x6e, 0x6f, 0x74, 0x65, 0x64, 0x20, 0x72, 0x69, 0x64, 0x65,
-    0x72, 0x73, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65, 0x20,
-    0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x20, 0x6e, 0x65, 0x61,
-    0x72, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x66, 0x61, 0x72, 0x0a, 0x48, 0x61,
-    0x64, 0x20, 0x6d, 0x75, 0x73, 0x74, 0x65, 0x72, 0x65, 0x64, 0x20, 0x61,
-    0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x68, 0x6f, 0x6d, 0x65, 0x73, 0x74,
-    0x65, 0x61, 0x64, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x6e, 0x69, 0x67, 0x68,
-    0x74, 0x2c, 0x0a, 0x46, 0x6f, 0x72, 0x20, 0x74, 0x68, 0x65, 0x20, 0x62,
-    0x75, 0x73, 0x68, 0x6d, 0x65, 0x6e, 0x20, 0x6c, 0x6f, 0x76, 0x65, 0x20,
-    0x68, 0x61, 0x72, 0x64, 0x20, 0x72, 0x69, 0x64, 0x69, 0x6e, 0x67, 0x20,
-    0x77, 0x68, 0x65, 0x72, 0x65, 0x20, 0x74, 0x68, 0x65, 0x20, 0x77, 0x69,
-    0x6c, 0x64, 0x20, 0x62, 0x75, 0x73, 0x68, 0x20, 0x68, 0x6f, 0x72, 0x73,
-    0x65, 0x73, 0x20, 0x61, 0x72, 0x65, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20,
-    0x74, 0x68, 0x65, 0x20, 0x73, 0x74, 0x6f, 0x63, 0x6b, 0x2d, 0x68, 0x6f,
-    0x72, 0x73, 0x65, 0x20, 0x73, 0x6e, 0x75, 0x66, 0x66, 0x73, 0x20, 0x74,
-    0x68, 0x65, 0x20, 0x62, 0x61, 0x74, 0x74, 0x6c, 0x65, 0x20, 0x77, 0x69,
-    0x74, 0x68, 0x20, 0x64, 0x65, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x2e, 0x00};
-
-static ArrayRef<char> BlockContent(BlockContentBytes);
-
 TEST(LinkGraphTest, Construction) {
   // Check that LinkGraph construction works as expected.
   LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, llvm::endianness::little,
diff --git a/llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp
new file mode 100644
index 000000000000000..be922275be26f49
--- /dev/null
+++ b/llvm/unittests/ExecutionEngine/JITLink/MachOLinkGraphTests.cpp
@@ -0,0 +1,35 @@
+//===------ LinkGraphTests.cpp - Unit tests for core JITLink classes ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITLinkTestUtils.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+
+TEST(MachOLinkGraphTest, GetStandardSections) {
+  // Check that LinkGraph construction works as expected.
+  LinkGraph G("foo", Triple("arm64-apple-darwin"), 8, llvm::endianness::little,
+              getGenericEdgeKindName);
+
+  auto &Data = getMachODefaultRWDataSection(G);
+  EXPECT_TRUE(Data.empty());
+  EXPECT_EQ(Data.getName(), orc::MachODataDataSectionName);
+  EXPECT_EQ(Data.getMemProt(), orc::MemProt::Read | orc::MemProt::Write);
+
+  auto &Text = getMachODefaultTextSection(G);
+  EXPECT_TRUE(Text.empty());
+  EXPECT_EQ(Text.getName(), orc::MachOTextTextSectionName);
+  EXPECT_EQ(Text.getMemProt(), orc::MemProt::Read | orc::MemProt::Exec);
+}
diff --git a/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp
index f0f3dd117c6f883..2b303f7a8c1a298 100644
--- a/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp
+++ b/llvm/unittests/ExecutionEngine/JITLink/MemoryManagerErrorTests.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "JITLinkMocks.h"
+#include "JITLinkTestUtils.h"
 #include "llvm/ExecutionEngine/JITLink/MachO_x86_64.h"
 
 #include "llvm/Testing/Support/Error.h"
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index fe04cbbce12dcd4..630cd03c688012c 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -27,6 +27,20 @@
 using namespace llvm;
 using namespace omp;
 
+// Wrapper lambdas to allow using EXPECT*() macros inside of error-returning
+// callbacks.
+#define FINICB_WRAPPER(cb)                                                     \
+  [&cb](InsertPointTy IP) -> Error {                                           \
+    cb(IP);                                                                    \
+    return Error::success();                                                   \
+  }
+
+#define BODYGENCB_WRAPPER(cb)                                                  \
+  [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error {            \
+    cb(AllocaIP, CodeGenIP);                                                   \
+    return Error::success();                                                   \
+  }
+
 namespace {
 
 /// Create an instruction that uses the values in \p Values. We use "printf"
@@ -218,9 +232,13 @@ class OpenMPIRBuilderTest : public testing::Test {
       CallInst *CallInst = createPrintfCall(Builder, "%d\\n", {LC});
       if (Call)
         *Call = CallInst;
+
+      return Error::success();
     };
-    CanonicalLoopInfo *Loop =
+    Expected<CanonicalLoopInfo *> LoopResult =
         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount);
+    assert(LoopResult && "unexpected error");
+    CanonicalLoopInfo *Loop = *LoopResult;
 
     // Finalize the function.
     Builder.restoreIP(Loop->getAfterIP());
@@ -327,14 +345,18 @@ TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
 
   IRBuilder<> Builder(BB);
 
-  OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for);
+  OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP1 =
+      OMPBuilder.createBarrier({IRBuilder<>::InsertPoint()}, OMPD_for);
+  assert(BarrierIP1 && "unexpected error");
   EXPECT_TRUE(M->global_empty());
   EXPECT_EQ(M->size(), 1U);
   EXPECT_EQ(F->size(), 1U);
   EXPECT_EQ(BB->size(), 0U);
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
-  OMPBuilder.createBarrier(Loc, OMPD_for);
+  OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP2 =
+      OMPBuilder.createBarrier(Loc, OMPD_for);
+  assert(BarrierIP2 && "unexpected error");
   EXPECT_FALSE(M->global_empty());
   EXPECT_EQ(M->size(), 3U);
   EXPECT_EQ(F->size(), 1U);
@@ -372,13 +394,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) {
     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
     BranchInst::Create(CBB, IP.getBlock());
   };
-  OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
+  OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
 
   IRBuilder<> Builder(BB);
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
-  auto NewIP = OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
-  Builder.restoreIP(NewIP);
+  OpenMPIRBuilder::InsertPointOrErrorTy NewIP =
+      OMPBuilder.createCancel(Loc, nullptr, OMPD_parallel);
+  assert(NewIP && "unexpected error");
+  Builder.restoreIP(*NewIP);
   EXPECT_FALSE(M->global_empty());
   EXPECT_EQ(M->size(), 4U);
   EXPECT_EQ(F->size(), 4U);
@@ -400,7 +424,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancel) {
   EXPECT_EQ(Cancel->getNumUses(), 1U);
   Instruction *CancelBBTI = Cancel->getParent()->getTerminator();
   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
-  EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP.getBlock());
+  EXPECT_EQ(CancelBBTI->getSuccessor(0), NewIP->getBlock());
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
   EXPECT_NE(GTID1, nullptr);
@@ -439,13 +463,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
     BranchInst::Create(CBB, IP.getBlock());
   };
-  OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
+  OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
 
   IRBuilder<> Builder(BB);
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
-  auto NewIP = OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
-  Builder.restoreIP(NewIP);
+  OpenMPIRBuilder::InsertPointOrErrorTy NewIP =
+      OMPBuilder.createCancel(Loc, Builder.getTrue(), OMPD_parallel);
+  assert(NewIP && "unexpected error");
+  Builder.restoreIP(*NewIP);
   EXPECT_FALSE(M->global_empty());
   EXPECT_EQ(M->size(), 4U);
   EXPECT_EQ(F->size(), 7U);
@@ -473,7 +499,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelIfCond) {
   EXPECT_EQ(CancelBBTI->getNumSuccessors(), 2U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0)->size(), 1U);
   EXPECT_EQ(CancelBBTI->getSuccessor(0)->getUniqueSuccessor(),
-            NewIP.getBlock());
+            NewIP->getBlock());
   EXPECT_EQ(CancelBBTI->getSuccessor(1)->size(), 3U);
   CallInst *GTID1 = dyn_cast<CallInst>(&CancelBBTI->getSuccessor(1)->front());
   EXPECT_NE(GTID1, nullptr);
@@ -512,13 +538,15 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
     ASSERT_EQ(IP.getBlock()->end(), IP.getPoint());
     BranchInst::Create(CBB, IP.getBlock());
   };
-  OMPBuilder.pushFinalizationCB({FiniCB, OMPD_parallel, true});
+  OMPBuilder.pushFinalizationCB({FINICB_WRAPPER(FiniCB), OMPD_parallel, true});
 
   IRBuilder<> Builder(BB);
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP()});
-  auto NewIP = OMPBuilder.createBarrier(Loc, OMPD_for);
-  Builder.restoreIP(NewIP);
+  OpenMPIRBuilder::InsertPointOrErrorTy NewIP =
+      OMPBuilder.createBarrier(Loc, OMPD_for);
+  assert(NewIP && "unexpected error");
+  Builder.restoreIP(*NewIP);
   EXPECT_FALSE(M->global_empty());
   EXPECT_EQ(M->size(), 3U);
   EXPECT_EQ(F->size(), 4U);
@@ -540,7 +568,7 @@ TEST_F(OpenMPIRBuilderTest, CreateCancelBarrier) {
   EXPECT_EQ(Barrier->getNumUses(), 1U);
   Instruction *BarrierBBTI = Barrier->getParent()->getTerminator();
   EXPECT_EQ(BarrierBBTI->getNumSuccessors(), 2U);
-  EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP.getBlock());
+  EXPECT_EQ(BarrierBBTI->getSuccessor(0), NewIP->getBlock());
   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->size(), 1U);
   EXPECT_EQ(BarrierBBTI->getSuccessor(1)->getTerminator()->getNumSuccessors(),
             1U);
@@ -563,7 +591,9 @@ TEST_F(OpenMPIRBuilderTest, DbgLoc) {
   IRBuilder<> Builder(BB);
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-  OMPBuilder.createBarrier(Loc, OMPD_for);
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createBarrier(Loc, OMPD_for);
+  assert(AfterIP && "unexpected error");
   CallInst *GTID = dyn_cast<CallInst>(&BB->front());
   CallInst *Barrier = dyn_cast<CallInst>(GTID->getNextNode());
   EXPECT_EQ(GTID->getDebugLoc(), DL);
@@ -627,6 +657,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
     Instruction *ThenTerm, *ElseTerm;
     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
                                   &ThenTerm, &ElseTerm);
+    return Error::success();
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
@@ -654,19 +685,23 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
     return CodeGenIP;
   };
 
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+  auto FiniCB = [&](InsertPointTy CodeGenIP) {
+    ++NumFinalizationPoints;
+    return Error::success();
+  };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
+  assert(AfterIP && "unexpected error");
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
   EXPECT_EQ(NumFinalizationPoints, 1U);
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize();
@@ -735,6 +770,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
     Instruction *ThenTerm, *ElseTerm;
     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
                                   &ThenTerm, &ElseTerm);
+    return Error::success();
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
@@ -762,18 +798,22 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
     return CodeGenIP;
   };
 
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+  auto FiniCB = [&](InsertPointTy CodeGenIP) {
+    ++NumFinalizationPoints;
+    return Error::success();
+  };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
+  assert(AfterIP && "unexpected error");
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
   EXPECT_EQ(NumFinalizationPoints, 1U);
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize();
@@ -826,6 +866,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
 
   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     ++NumInnerBodiesGenerated;
+    return Error::success();
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
@@ -841,7 +882,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
     return CodeGenIP;
   };
 
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+  auto FiniCB = [&](InsertPointTy CodeGenIP) {
+    ++NumFinalizationPoints;
+    return Error::success();
+  };
 
   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     ++NumOuterBodiesGenerated;
@@ -849,27 +893,29 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
     BasicBlock *CGBB = CodeGenIP.getBlock();
     BasicBlock *NewBB = SplitBlock(CGBB, &*CodeGenIP.getPoint());
     CGBB->getTerminator()->eraseFromParent();
-    ;
 
-    IRBuilder<>::InsertPoint AfterIP = OMPBuilder.createParallel(
+    OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(
         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+    assert(AfterIP && "unexpected error");
 
-    Builder.restoreIP(AfterIP);
+    Builder.restoreIP(*AfterIP);
     Builder.CreateBr(NewBB);
+    return Error::success();
   };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
+  assert(AfterIP && "unexpected error");
 
   EXPECT_EQ(NumInnerBodiesGenerated, 1U);
   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
   EXPECT_EQ(NumFinalizationPoints, 2U);
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize();
@@ -920,6 +966,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
 
   auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     ++NumInnerBodiesGenerated;
+    return Error::success();
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
@@ -935,7 +982,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
     return CodeGenIP;
   };
 
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { ++NumFinalizationPoints; };
+  auto FiniCB = [&](InsertPointTy CodeGenIP) {
+    ++NumFinalizationPoints;
+    return Error::success();
+  };
 
   auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     ++NumOuterBodiesGenerated;
@@ -948,32 +998,36 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
     NewBB1->getTerminator()->eraseFromParent();
     ;
 
-    IRBuilder<>::InsertPoint AfterIP1 = OMPBuilder.createParallel(
+    OpenMPIRBuilder::InsertPointOrErrorTy AfterIP1 = OMPBuilder.createParallel(
         InsertPointTy(CGBB, CGBB->end()), AllocaIP, InnerBodyGenCB, PrivCB,
         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+    assert(AfterIP1 && "unexpected error");
 
-    Builder.restoreIP(AfterIP1);
+    Builder.restoreIP(*AfterIP1);
     Builder.CreateBr(NewBB1);
 
-    IRBuilder<>::InsertPoint AfterIP2 = OMPBuilder.createParallel(
+    OpenMPIRBuilder::InsertPointOrErrorTy AfterIP2 = OMPBuilder.createParallel(
         InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, InnerBodyGenCB, PrivCB,
         FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false);
+    assert(AfterIP2 && "unexpected error");
 
-    Builder.restoreIP(AfterIP2);
+    Builder.restoreIP(*AfterIP2);
     Builder.CreateBr(NewBB2);
+    return Error::success();
   };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, AllocaIP, OuterBodyGenCB, PrivCB, FiniCB,
                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
+  assert(AfterIP && "unexpected error");
 
   EXPECT_EQ(NumInnerBodiesGenerated, 2U);
   EXPECT_EQ(NumOuterBodiesGenerated, 1U);
   EXPECT_EQ(NumFinalizationPoints, 3U);
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize();
@@ -1043,6 +1097,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
     Instruction *ThenTerm, *ElseTerm;
     SplitBlockAndInsertIfThenElse(Cmp, &*Builder.GetInsertPoint(), &ThenTerm,
                                   &ElseTerm);
+    return Error::success();
   };
 
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
@@ -1073,20 +1128,22 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   auto FiniCB = [&](InsertPointTy CodeGenIP) {
     ++NumFinalizationPoints;
     // No destructors.
+    return Error::success();
   };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
                                 Builder.CreateIsNotNull(F->arg_begin()),
                                 nullptr, OMP_PROC_BIND_default, false);
+  assert(AfterIP && "unexpected error");
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 1U);
   EXPECT_EQ(NumFinalizationPoints, 1U);
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
 
@@ -1141,8 +1198,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
     // Create three barriers, two cancel barriers but only one checked.
     Function *CBFn, *BFn;
 
-    Builder.restoreIP(
-        OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel));
+    OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP1 =
+        OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel);
+    assert(BarrierIP1 && "unexpected error");
+    Builder.restoreIP(*BarrierIP1);
 
     CBFn = M->getFunction("__kmpc_cancel_barrier");
     BFn = M->getFunction("__kmpc_barrier");
@@ -1153,8 +1212,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
     ASSERT_EQ(CBFn->user_back()->getNumUses(), 1U);
     CheckedBarrier = cast<CallInst>(CBFn->user_back());
 
-    Builder.restoreIP(
-        OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true));
+    OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP2 =
+        OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, true);
+    assert(BarrierIP2 && "unexpected error");
+    Builder.restoreIP(*BarrierIP2);
     CBFn = M->getFunction("__kmpc_cancel_barrier");
     BFn = M->getFunction("__kmpc_barrier");
     ASSERT_NE(CBFn, nullptr);
@@ -1164,8 +1225,10 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
     ASSERT_TRUE(isa<CallInst>(BFn->user_back()));
     ASSERT_EQ(BFn->user_back()->getNumUses(), 0U);
 
-    Builder.restoreIP(OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel,
-                                               false, false));
+    OpenMPIRBuilder::InsertPointOrErrorTy BarrierIP3 =
+        OMPBuilder.createBarrier(Builder.saveIP(), OMPD_parallel, false, false);
+    assert(BarrierIP3 && "unexpected error");
+    Builder.restoreIP(*BarrierIP3);
     ASSERT_EQ(CBFn->getNumUses(), 2U);
     ASSERT_EQ(BFn->getNumUses(), 1U);
     ASSERT_TRUE(CBFn->user_back() != CheckedBarrier);
@@ -1190,21 +1253,23 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
     Builder.restoreIP(IP);
     Builder.CreateCall(FakeDestructor,
                        {Builder.getInt32(NumFinalizationPoints)});
+    return Error::success();
   };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
-      OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
-                                Builder.CreateIsNotNull(F->arg_begin()),
-                                nullptr, OMP_PROC_BIND_default, true);
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createParallel(
+      Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB, FiniCB,
+      Builder.CreateIsNotNull(F->arg_begin()), nullptr, OMP_PROC_BIND_default,
+      true);
+  assert(AfterIP && "unexpected error");
 
   EXPECT_EQ(NumBodiesGenerated, 1U);
   EXPECT_EQ(NumPrivatizedVars, 0U);
   EXPECT_EQ(NumFinalizationPoints, 2U);
   EXPECT_EQ(FakeDestructor->getNumUses(), 2U);
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
 
@@ -1269,20 +1334,22 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
     Builder.CreateCall(TakeI32PtrFunc, I32PtrVal);
     Builder.CreateCall(TakeStructFunc, StructVal);
     Builder.CreateCall(TakeStructPtrFunc, StructPtrVal);
+    return Error::success();
   };
   auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
                     Value &Inner, Value *&ReplacementValue) {
     ReplacementValue = &Inner;
     return CodeGenIP;
   };
-  auto FiniCB = [](InsertPointTy) {};
+  auto FiniCB = [](InsertPointTy) { return Error::success(); };
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  IRBuilder<>::InsertPoint AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
                                 nullptr, nullptr, OMP_PROC_BIND_default, false);
-  Builder.restoreIP(AfterIP);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize();
@@ -1312,10 +1379,13 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
     Instruction *ThenTerm, *ElseTerm;
     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
                                   &ThenTerm, &ElseTerm);
+    return Error::success();
   };
 
-  CanonicalLoopInfo *Loop =
+  Expected<CanonicalLoopInfo *> LoopResult =
       OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount);
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *Loop = *LoopResult;
 
   Builder.restoreIP(Loop->getAfterIP());
   ReturnInst *RetInst = Builder.CreateRetVoid();
@@ -1367,10 +1437,14 @@ TEST_F(OpenMPIRBuilderTest, CanonicalLoopBounds) {
     Value *StartVal = ConstantInt::get(LCTy, Start);
     Value *StopVal = ConstantInt::get(LCTy, Stop);
     Value *StepVal = ConstantInt::get(LCTy, Step);
-    auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
-    CanonicalLoopInfo *Loop =
+    auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+      return Error::success();
+    };
+    Expected<CanonicalLoopInfo *> LoopResult =
         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
                                        StepVal, IsSigned, InclusiveStop);
+    assert(LoopResult && "unexpected error");
+    CanonicalLoopInfo *Loop = *LoopResult;
     Loop->assertOK();
     Builder.restoreIP(Loop->getAfterIP());
     Value *TripCount = Loop->getTripCount();
@@ -1463,16 +1537,22 @@ TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) {
                                   Value *InnerLC) {
       Builder.restoreIP(InnerCodeGenIP);
       Call = createPrintfCall(Builder, "body i=%d j=%d\\n", {OuterLC, InnerLC});
+      return Error::success();
     };
-    InnerLoop = OMPBuilder.createCanonicalLoop(
+    Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
         Builder.saveIP(), InnerLoopBodyGenCB, InnerTripCount, "inner");
+    assert(LoopResult && "unexpected error");
+    InnerLoop = *LoopResult;
 
     Builder.restoreIP(InnerLoop->getAfterIP());
     InbetweenTrail =
         createPrintfCall(Builder, "In-between trail i=%d\\n", {OuterLC});
+    return Error::success();
   };
-  CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       OuterLoc, OuterLoopBodyGenCB, OuterTripCount, "outer");
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *OuterLoop = *LoopResult;
 
   // Finish the function.
   Builder.restoreIP(OuterLoop->getAfterIP());
@@ -1582,12 +1662,18 @@ TEST_F(OpenMPIRBuilderTest, TileNestedLoops) {
 
       // Add something that consumes the induction variables to the body.
       createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
+      return Error::success();
     };
-    InnerLoop = OMPBuilder.createCanonicalLoop(
+    Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
         OuterCodeGenIP, InnerLoopBodyGenCB, TripCount, "inner");
+    assert(LoopResult && "unexpected error");
+    InnerLoop = *LoopResult;
+    return Error::success();
   };
-  CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       Loc, OuterLoopBodyGenCB, TripCount, "outer");
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *OuterLoop = *LoopResult;
 
   // Finalize the function.
   Builder.restoreIP(OuterLoop->getAfterIP());
@@ -1682,14 +1768,20 @@ TEST_F(OpenMPIRBuilderTest, TileNestedLoopsWithBounds) {
 
       // Add something that consumes the induction variable to the body.
       Call = createPrintfCall(Builder, "i=%d j=%d\\n", {OuterLC, InnerLC});
+      return Error::success();
     };
-    InnerLoop = OMPBuilder.createCanonicalLoop(
+    Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
         OuterCodeGenIP, InnerLoopBodyGenCB, InnerStartVal, InnerStopVal,
         InnerStep, false, false, ComputeIP, "inner");
+    assert(LoopResult && "unexpected error");
+    InnerLoop = *LoopResult;
+    return Error::success();
   };
-  CanonicalLoopInfo *OuterLoop = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       Loc, OuterLoopBodyGenCB, OuterStartVal, OuterStopVal, OuterStep, false,
       false, ComputeIP, "outer");
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *OuterLoop = *LoopResult;
 
   // Finalize the function
   Builder.restoreIP(OuterLoop->getAfterIP());
@@ -1793,10 +1885,14 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) {
     Value *StepVal = ConstantInt::get(LCTy, Step);
 
     // Generate a loop.
-    auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {};
-    CanonicalLoopInfo *Loop =
+    auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+      return Error::success();
+    };
+    Expected<CanonicalLoopInfo *> LoopResult =
         OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, StartVal, StopVal,
                                        StepVal, IsSigned, InclusiveStop);
+    assert(LoopResult && "unexpected error");
+    CanonicalLoopInfo *Loop = *LoopResult;
     InsertPointTy AfterIP = Loop->getAfterIP();
 
     // Tile the loop.
@@ -2245,19 +2341,22 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkshareLoopTarget) {
   Value *StartVal = ConstantInt::get(LCTy, 10);
   Value *StopVal = ConstantInt::get(LCTy, 52);
   Value *StepVal = ConstantInt::get(LCTy, 2);
-  auto LoopBodyGen = [&](InsertPointTy, Value *) {};
+  auto LoopBodyGen = [&](InsertPointTy, Value *) { return Error::success(); };
 
-  CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       Loc, LoopBodyGen, StartVal, StopVal, StepVal, false, false);
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *CLI = *LoopResult;
   BasicBlock *Preheader = CLI->getPreheader();
   Value *TripCount = CLI->getTripCount();
 
   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
 
-  IRBuilder<>::InsertPoint AfterIP = OMPBuilder.applyWorkshareLoop(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.applyWorkshareLoop(
       DL, CLI, AllocaIP, true, OMP_SCHEDULE_Static, nullptr, false, false,
       false, false, WorksharingLoopType::ForStaticLoop);
-  Builder.restoreIP(AfterIP);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize();
@@ -2306,11 +2405,15 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
   Value *StartVal = ConstantInt::get(LCTy, 10);
   Value *StopVal = ConstantInt::get(LCTy, 52);
   Value *StepVal = ConstantInt::get(LCTy, 2);
-  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
+  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
+    return Error::success();
+  };
 
-  CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
       /*IsSigned=*/false, /*InclusiveStop=*/false);
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *CLI = *LoopResult;
   BasicBlock *Preheader = CLI->getPreheader();
   BasicBlock *Body = CLI->getBody();
   Value *IV = CLI->getIndVar();
@@ -2319,8 +2422,9 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) {
   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
   InsertPointTy AllocaIP = Builder.saveIP();
 
-  OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
-                                OMP_SCHEDULE_Static);
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.applyWorkshareLoop(
+      DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static);
+  assert(AfterIP && "unexpected error");
 
   BasicBlock *Cond = Body->getSinglePredecessor();
   Instruction *Cmp = &*Cond->begin();
@@ -2412,8 +2516,9 @@ TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) {
   Value *ChunkSize = ConstantInt::get(LCTy, 5);
   InsertPointTy AllocaIP{&F->getEntryBlock(),
                          F->getEntryBlock().getFirstInsertionPt()};
-  OMPBuilder.applyWorkshareLoop(DL, CLI, AllocaIP, /*NeedsBarrier=*/true,
-                                OMP_SCHEDULE_Static, ChunkSize);
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.applyWorkshareLoop(
+      DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkSize);
+  assert(AfterIP && "unexpected error");
 
   OMPBuilder.finalize();
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -2500,11 +2605,15 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
   Value *StepVal = ConstantInt::get(LCTy, 2);
   Value *ChunkVal =
       (ChunkSize == 1) ? nullptr : ConstantInt::get(LCTy, ChunkSize);
-  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
+  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
+    return Error::success();
+  };
 
-  CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
       /*IsSigned=*/false, /*InclusiveStop=*/false);
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *CLI = *LoopResult;
 
   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
   InsertPointTy AllocaIP = Builder.saveIP();
@@ -2517,7 +2626,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
   BasicBlock *LatchBlock = CLI->getLatch();
   Value *IV = CLI->getIndVar();
 
-  InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
+  OpenMPIRBuilder::InsertPointOrErrorTy EndIP = OMPBuilder.applyWorkshareLoop(
       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, getSchedKind(SchedType),
       ChunkVal, /*Simd=*/false,
       (SchedType & omp::OMPScheduleType::ModifierMonotonic) ==
@@ -2525,10 +2634,11 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
       (SchedType & omp::OMPScheduleType::ModifierNonmonotonic) ==
           omp::OMPScheduleType::ModifierNonmonotonic,
       /*Ordered=*/false);
+  assert(EndIP && "unexpected error");
 
   // The returned value should be the "after" point.
-  ASSERT_EQ(EndIP.getBlock(), AfterIP.getBlock());
-  ASSERT_EQ(EndIP.getPoint(), AfterIP.getPoint());
+  ASSERT_EQ(EndIP->getBlock(), AfterIP.getBlock());
+  ASSERT_EQ(EndIP->getPoint(), AfterIP.getPoint());
 
   auto AllocaIter = BB->begin();
   ASSERT_GE(std::distance(BB->begin(), BB->end()), 4);
@@ -2603,7 +2713,7 @@ TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) {
   EXPECT_EQ(NumCallsInExitBlock, 2u);
 
   // Add a termination to our block and check that it is internally consistent.
-  Builder.restoreIP(EndIP);
+  Builder.restoreIP(*EndIP);
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -2642,11 +2752,15 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
   Value *StopVal = ConstantInt::get(LCTy, 52);
   Value *StepVal = ConstantInt::get(LCTy, 2);
   Value *ChunkVal = ConstantInt::get(LCTy, ChunkSize);
-  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {};
+  auto LoopBodyGen = [&](InsertPointTy, llvm::Value *) {
+    return llvm::Error::success();
+  };
 
-  CanonicalLoopInfo *CLI = OMPBuilder.createCanonicalLoop(
+  Expected<CanonicalLoopInfo *> LoopResult = OMPBuilder.createCanonicalLoop(
       Loc, LoopBodyGen, StartVal, StopVal, StepVal,
       /*IsSigned=*/false, /*InclusiveStop=*/false);
+  assert(LoopResult && "unexpected error");
+  CanonicalLoopInfo *CLI = *LoopResult;
 
   Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
   InsertPointTy AllocaIP = Builder.saveIP();
@@ -2658,14 +2772,15 @@ TEST_F(OpenMPIRBuilderTest, DynamicWorkShareLoopOrdered) {
   BasicBlock *LatchBlock = CLI->getLatch();
   Value *IV = CLI->getIndVar();
 
-  InsertPointTy EndIP = OMPBuilder.applyWorkshareLoop(
+  OpenMPIRBuilder::InsertPointOrErrorTy EndIP = OMPBuilder.applyWorkshareLoop(
       DL, CLI, AllocaIP, /*NeedsBarrier=*/true, OMP_SCHEDULE_Static, ChunkVal,
       /*HasSimdModifier=*/false, /*HasMonotonicModifier=*/false,
       /*HasNonmonotonicModifier=*/false,
       /*HasOrderedClause=*/true);
+  assert(EndIP && "unexpected error");
 
   // Add a termination to our block and check that it is internally consistent.
-  Builder.restoreIP(EndIP);
+  Builder.restoreIP(*EndIP);
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -2749,7 +2864,10 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) {
     EXPECT_NE(IPBB->end(), IP.getPoint());
   };
 
-  Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createMaster(
+      Builder, BODYGENCB_WRAPPER(BodyGenCB), FINICB_WRAPPER(FiniCB));
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Value *EntryBBTI = EntryBB->getTerminator();
   EXPECT_NE(EntryBBTI, nullptr);
   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
@@ -2827,8 +2945,10 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
   };
 
   Constant *Filter = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-  Builder.restoreIP(
-      OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, Filter));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createMasked(
+      Builder, BODYGENCB_WRAPPER(BodyGenCB), FINICB_WRAPPER(FiniCB), Filter);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Value *EntryBBTI = EntryBB->getTerminator();
   EXPECT_NE(EntryBBTI, nullptr);
   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
@@ -2893,8 +3013,11 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
   };
   BasicBlock *EntryBB = Builder.GetInsertBlock();
 
-  Builder.restoreIP(OMPBuilder.createCritical(Builder, BodyGenCB, FiniCB,
-                                              "testCRT", nullptr));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createCritical(Builder, BODYGENCB_WRAPPER(BodyGenCB),
+                                FINICB_WRAPPER(FiniCB), "testCRT", nullptr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   CallInst *CriticalEntryCI = nullptr;
   for (auto &EI : *EntryBB) {
@@ -3141,8 +3264,11 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
 
   // Test for "#omp ordered [threads]"
   BasicBlock *EntryBB = Builder.GetInsertBlock();
-  Builder.restoreIP(
-      OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, true));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
+                                          FINICB_WRAPPER(FiniCB), true);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -3212,8 +3338,11 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
 
   // Test for "#omp ordered simd"
   BasicBlock *EntryBB = Builder.GetInsertBlock();
-  Builder.restoreIP(
-      OMPBuilder.createOrderedThreadsSimd(Builder, BodyGenCB, FiniCB, false));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createOrderedThreadsSimd(Builder, BODYGENCB_WRAPPER(BodyGenCB),
+                                          FINICB_WRAPPER(FiniCB), false);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -3326,8 +3455,11 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) {
     EXPECT_NE(IPBB->end(), IP.getPoint());
   };
 
-  Builder.restoreIP(
-      OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ false));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
+                              FINICB_WRAPPER(FiniCB), /*IsNowait*/ false);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Value *EntryBBTI = EntryBB->getTerminator();
   EXPECT_NE(EntryBBTI, nullptr);
   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
@@ -3416,8 +3548,11 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
     EXPECT_NE(IPBB->end(), IP.getPoint());
   };
 
-  Builder.restoreIP(
-      OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB, /*IsNowait*/ true));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createSingle(Builder, BODYGENCB_WRAPPER(BodyGenCB),
+                              FINICB_WRAPPER(FiniCB), /*IsNowait*/ true);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Value *EntryBBTI = EntryBB->getTerminator();
   EXPECT_NE(EntryBBTI, nullptr);
   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
@@ -3535,9 +3670,11 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
     EXPECT_NE(IPBB->end(), IP.getPoint());
   };
 
-  Builder.restoreIP(OMPBuilder.createSingle(Builder, BodyGenCB, FiniCB,
-                                            /*IsNowait*/ false, {CPVar},
-                                            {CopyFunc}));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createSingle(
+      Builder, BODYGENCB_WRAPPER(BodyGenCB), FINICB_WRAPPER(FiniCB),
+      /*IsNowait*/ false, {CPVar}, {CopyFunc});
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Value *EntryBBTI = EntryBB->getTerminator();
   EXPECT_NE(EntryBBTI, nullptr);
   EXPECT_TRUE(isa<BranchInst>(EntryBBTI));
@@ -3798,8 +3935,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdate) {
     Sub = IRB.CreateSub(ConstVal, Atomic);
     return Sub;
   };
-  Builder.restoreIP(OMPBuilder.createAtomicUpdate(
-      Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(
+      Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
   EXPECT_NE(ContTI, nullptr);
@@ -3865,8 +4004,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateFloat) {
     Sub = IRB.CreateFSub(ConstVal, Atomic);
     return Sub;
   };
-  Builder.restoreIP(OMPBuilder.createAtomicUpdate(
-      Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(
+      Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
   EXPECT_NE(ContTI, nullptr);
@@ -3931,8 +4072,10 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicUpdateIntr) {
     Sub = IRB.CreateSub(ConstVal, Atomic);
     return Sub;
   };
-  Builder.restoreIP(OMPBuilder.createAtomicUpdate(
-      Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createAtomicUpdate(
+      Builder, AllocaIP, X, Expr, AO, RMWOp, UpdateOp, IsXLHSInRHSPart);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   BasicBlock *ContBB = EntryBB->getSingleSuccessor();
   BranchInst *ContTI = dyn_cast<BranchInst>(ContBB->getTerminator());
   EXPECT_NE(ContTI, nullptr);
@@ -4003,9 +4146,12 @@ TEST_F(OpenMPIRBuilderTest, OMPAtomicCapture) {
   // integer update - not used
   auto UpdateOp = [&](Value *Atomic, IRBuilder<> &IRB) { return nullptr; };
 
-  Builder.restoreIP(OMPBuilder.createAtomicCapture(
-      Builder, AllocaIP, X, V, Expr, AO, RMWOp, UpdateOp, UpdateExpr,
-      IsPostfixUpdate, IsXLHSInRHSPart));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createAtomicCapture(Builder, AllocaIP, X, V, Expr, AO, RMWOp,
+                                     UpdateOp, UpdateExpr, IsPostfixUpdate,
+                                     IsXLHSInRHSPart);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   EXPECT_EQ(EntryBB->getParent()->size(), 1U);
   AtomicRMWInst *ARWM = dyn_cast<AtomicRMWInst>(Init->getNextNode());
   EXPECT_NE(ARWM, nullptr);
@@ -4361,12 +4507,15 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) {
     Instruction *ThenTerm, *ElseTerm;
     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
                                   &ThenTerm, &ElseTerm);
+    return Error::success();
   };
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-  Builder.restoreIP(OMPBuilder.createTeams(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams(
       Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr,
-      /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
+      /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
@@ -4423,14 +4572,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     Builder.restoreIP(CodeGenIP);
     Builder.CreateCall(FakeFunction, {});
+    return Error::success();
   };
 
   // `F` has an argument - an integer, so we use that as the thread limit.
-  Builder.restoreIP(OMPBuilder.createTeams(/*=*/Builder, BodyGenCB,
-                                           /*NumTeamsLower=*/nullptr,
-                                           /*NumTeamsUpper=*/nullptr,
-                                           /*ThreadLimit=*/F->arg_begin(),
-                                           /*IfExpr=*/nullptr));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams(
+      /*=*/Builder, BodyGenCB, /*NumTeamsLower=*/nullptr,
+      /*NumTeamsUpper=*/nullptr, /*ThreadLimit=*/F->arg_begin(),
+      /*IfExpr=*/nullptr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -4474,15 +4625,19 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     Builder.restoreIP(CodeGenIP);
     Builder.CreateCall(FakeFunction, {});
+    return Error::success();
   };
 
   // `F` already has an integer argument, so we use that as upper bound to
   // `num_teams`
-  Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB,
-                                           /*NumTeamsLower=*/nullptr,
-                                           /*NumTeamsUpper=*/F->arg_begin(),
-                                           /*ThreadLimit=*/nullptr,
-                                           /*IfExpr=*/nullptr));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createTeams(Builder, BodyGenCB,
+                             /*NumTeamsLower=*/nullptr,
+                             /*NumTeamsUpper=*/F->arg_begin(),
+                             /*ThreadLimit=*/nullptr,
+                             /*IfExpr=*/nullptr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -4531,13 +4686,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     Builder.restoreIP(CodeGenIP);
     Builder.CreateCall(FakeFunction, {});
+    return Error::success();
   };
 
   // `F` already has an integer argument, so we use that as upper bound to
   // `num_teams`
-  Builder.restoreIP(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper,
-                             /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr));
+                             /*ThreadLimit=*/nullptr, /*IfExpr=*/nullptr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -4593,11 +4751,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     Builder.restoreIP(CodeGenIP);
     Builder.CreateCall(FakeFunction, {});
+    return Error::success();
   };
 
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
-  Builder.restoreIP(OMPBuilder.createTeams(
-      Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams(
+      Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, nullptr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -4644,13 +4805,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     Builder.restoreIP(CodeGenIP);
     Builder.CreateCall(FakeFunction, {});
+    return Error::success();
   };
 
   // `F` already has an integer argument, so we use that as upper bound to
   // `num_teams`
-  Builder.restoreIP(OMPBuilder.createTeams(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams(
       Builder, BodyGenCB, /*NumTeamsLower=*/nullptr, /*NumTeamsUpper=*/nullptr,
-      /*ThreadLimit=*/nullptr, IfExpr));
+      /*ThreadLimit=*/nullptr, IfExpr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -4707,12 +4871,15 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
   auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
     Builder.restoreIP(CodeGenIP);
     Builder.CreateCall(FakeFunction, {});
+    return Error::success();
   };
 
   // `F` already has an integer argument, so we use that as upper bound to
   // `num_teams`
-  Builder.restoreIP(OMPBuilder.createTeams(Builder, BodyGenCB, NumTeamsLower,
-                                           NumTeamsUpper, ThreadLimit, IfExpr));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTeams(
+      Builder, BodyGenCB, NumTeamsLower, NumTeamsUpper, ThreadLimit, IfExpr);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -4937,6 +5104,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
 
     BodyIP = Builder.saveIP();
     BodyAllocaIP = InnerAllocaIP;
+    return Error::success();
   };
 
   // Privatization for reduction creates local copies of reduction variables and
@@ -4969,14 +5137,15 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
   };
 
   // Do nothing in finalization.
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
 
-  InsertPointTy AfterIP =
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
                                 /* IfCondition */ nullptr,
                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
                                 /* IsCancellable */ false);
-  Builder.restoreIP(AfterIP);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
       {SumType, SumReduced, SumPrivatized,
@@ -4989,10 +5158,12 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
 
   bool ReduceVariableByRef[] = {false, false};
 
-  OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos,
-                              ReduceVariableByRef);
+  OpenMPIRBuilder::InsertPointOrErrorTy ReductionsIP =
+      OMPBuilder.createReductions(BodyIP, BodyAllocaIP, ReductionInfos,
+                                  ReduceVariableByRef);
+  assert(ReductionsIP && "unexpected error");
 
-  Builder.restoreIP(AfterIP);
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize(F);
@@ -5172,6 +5343,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
 
     FirstBodyIP = Builder.saveIP();
     FirstBodyAllocaIP = InnerAllocaIP;
+    return Error::success();
   };
 
   InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
@@ -5190,6 +5362,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
 
     SecondBodyIP = Builder.saveIP();
     SecondBodyAllocaIP = InnerAllocaIP;
+    return Error::success();
   };
 
   // Privatization for reduction creates local copies of reduction variables and
@@ -5224,36 +5397,44 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
   };
 
   // Do nothing in finalization.
-  auto FiniCB = [&](InsertPointTy CodeGenIP) { return CodeGenIP; };
+  auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
 
-  Builder.restoreIP(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP1 =
       OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
                                 FiniCB, /* IfCondition */ nullptr,
                                 /* NumThreads */ nullptr, OMP_PROC_BIND_default,
-                                /* IsCancellable */ false));
-  InsertPointTy AfterIP = OMPBuilder.createParallel(
+                                /* IsCancellable */ false);
+  assert(AfterIP1 && "unexpected error");
+  Builder.restoreIP(*AfterIP1);
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP2 = OMPBuilder.createParallel(
       {Builder.saveIP(), DL}, OuterAllocaIP, SecondBodyGenCB, PrivCB, FiniCB,
       /* IfCondition */ nullptr,
       /* NumThreads */ nullptr, OMP_PROC_BIND_default,
       /* IsCancellable */ false);
+  assert(AfterIP2 && "unexpected error");
+  Builder.restoreIP(*AfterIP2);
 
   OMPBuilder.Config.setIsGPU(false);
   bool ReduceVariableByRef[] = {false};
 
-  OMPBuilder.createReductions(
-      FirstBodyIP, FirstBodyAllocaIP,
-      {{SumType, SumReduced, SumPrivatized,
-        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
-        /*ReductionGenClang=*/nullptr, sumAtomicReduction}},
-      ReduceVariableByRef);
-  OMPBuilder.createReductions(
-      SecondBodyIP, SecondBodyAllocaIP,
-      {{XorType, XorReduced, XorPrivatized,
-        /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
-        /*ReductionGenClang=*/nullptr, xorAtomicReduction}},
-      ReduceVariableByRef);
-
-  Builder.restoreIP(AfterIP);
+  OpenMPIRBuilder::InsertPointOrErrorTy ReductionsIP1 =
+      OMPBuilder.createReductions(
+          FirstBodyIP, FirstBodyAllocaIP,
+          {{SumType, SumReduced, SumPrivatized,
+            /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
+            /*ReductionGenClang=*/nullptr, sumAtomicReduction}},
+          ReduceVariableByRef);
+  assert(ReductionsIP1 && "unexpected error");
+  OpenMPIRBuilder::InsertPointOrErrorTy ReductionsIP2 =
+      OMPBuilder.createReductions(
+          SecondBodyIP, SecondBodyAllocaIP,
+          {{XorType, XorReduced, XorPrivatized,
+            /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, xorReduction,
+            /*ReductionGenClang=*/nullptr, xorAtomicReduction}},
+          ReduceVariableByRef);
+  assert(ReductionsIP2 && "unexpected error");
+
+  Builder.restoreIP(*AfterIP2);
   Builder.CreateRetVoid();
 
   OMPBuilder.finalize(F);
@@ -5320,8 +5501,10 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
   llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
   llvm::SmallVector<BasicBlock *, 4> CaseBBs;
 
-  auto FiniCB = [&](InsertPointTy IP) {};
-  auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
+  auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
   SectionCBVector.push_back(SectionCB);
 
   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
@@ -5329,8 +5512,10 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
                    llvm::Value *&ReplVal) { return CodeGenIP; };
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
-                                              PrivCB, FiniCB, false, false));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createSections(
+      Loc, AllocaIP, SectionCBVector, PrivCB, FiniCB, false, false);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid(); // Required at the end of the function
   EXPECT_NE(F->getEntryBlock().getTerminator(), nullptr);
   EXPECT_FALSE(verifyModule(*M, &errs()));
@@ -5371,6 +5556,7 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) {
     Value *PrivLoad =
         Builder.CreateLoad(F->arg_begin()->getType(), PrivAI, "local.alloca");
     Builder.CreateICmpNE(F->arg_begin(), PrivLoad);
+    return Error::success();
   };
   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
                    llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
@@ -5383,8 +5569,11 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) {
 
   IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
                                     F->getEntryBlock().getFirstInsertionPt());
-  Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
-                                              PrivCB, FiniCB, false, false));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector, PrivCB,
+                                FINICB_WRAPPER(FiniCB), false, false);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid(); // Required at the end of the function
 
   // Switch BB's predecessor is loop condition BB, whose successor at index 1 is
@@ -5468,10 +5657,12 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsNoWait) {
   auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
                    llvm::Value &, llvm::Value &Val,
                    llvm::Value *&ReplVal) { return CodeGenIP; };
-  auto FiniCB = [&](InsertPointTy IP) {};
+  auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
 
-  Builder.restoreIP(OMPBuilder.createSections(Loc, AllocaIP, SectionCBVector,
-                                              PrivCB, FiniCB, false, true));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createSections(
+      Loc, AllocaIP, SectionCBVector, PrivCB, FiniCB, false, true);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   Builder.CreateRetVoid(); // Required at the end of the function
   for (auto &Inst : instructions(*F)) {
     EXPECT_FALSE(isa<CallInst>(Inst) &&
@@ -5692,9 +5883,11 @@ TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
   OMPBuilder.Config.setIsGPU(true);
 
   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_begin_mapper;
-  Builder.restoreIP(OMPBuilder.createTargetData(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTargetData(
       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
-      /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
+      /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
   EXPECT_NE(TargetDataCall, nullptr);
@@ -5751,9 +5944,11 @@ TEST_F(OpenMPIRBuilderTest, TargetExitData) {
   OMPBuilder.Config.setIsGPU(true);
 
   llvm::omp::RuntimeFunction RTLFunc = OMPRTL___tgt_target_data_end_mapper;
-  Builder.restoreIP(OMPBuilder.createTargetData(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTargetData(
       Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
-      /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc));
+      /* IfCond= */ nullptr, Info, GenMapInfoCB, &RTLFunc);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
   EXPECT_NE(TargetDataCall, nullptr);
@@ -5859,9 +6054,12 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
     return Builder.saveIP();
   };
 
-  Builder.restoreIP(OMPBuilder.createTargetData(
-      Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
-      /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy TargetDataIP1 =
+      OMPBuilder.createTargetData(
+          Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
+          /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyCB);
+  assert(TargetDataIP1 && "unexpected error");
+  Builder.restoreIP(*TargetDataIP1);
 
   CallInst *TargetDataCall = dyn_cast<CallInst>(&BB->back());
   EXPECT_NE(TargetDataCall, nullptr);
@@ -5884,9 +6082,12 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
     EXPECT_EQ(TargetDataCall, nullptr);
     return Builder.saveIP();
   };
-  Builder.restoreIP(OMPBuilder.createTargetData(
-      Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
-      /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy TargetDataIP2 =
+      OMPBuilder.createTargetData(
+          Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
+          /* IfCond= */ nullptr, Info, GenMapInfoCB, nullptr, BodyTargetCB);
+  assert(TargetDataIP2 && "unexpected error");
+  Builder.restoreIP(*TargetDataIP2);
   EXPECT_TRUE(CheckDevicePassBodyGen);
 
   Builder.CreateRetVoid();
@@ -5981,9 +6182,11 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
 
   TargetRegionEntryInfo EntryInfo("func", 42, 4711, 17);
   OpenMPIRBuilder::LocationDescription OmpLoc({Builder.saveIP(), DL});
-  Builder.restoreIP(OMPBuilder.createTarget(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTarget(
       OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), Builder.saveIP(),
-      EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB));
+      EntryInfo, -1, 0, Inputs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6089,11 +6292,13 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
                                   /*Line=*/3, /*Count=*/0);
 
-  Builder.restoreIP(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
                               EntryInfo, /*NumTeams=*/-1,
                               /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
-                              BodyGenCB, SimpleArgAccessorCB));
+                              BodyGenCB, SimpleArgAccessorCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -6238,11 +6443,13 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
   TargetRegionEntryInfo EntryInfo("parent", /*DeviceID=*/1, /*FileID=*/2,
                                   /*Line=*/3, /*Count=*/0);
 
-  Builder.restoreIP(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
       OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
                               EntryInfo, /*NumTeams=*/-1,
                               /*NumThreads=*/0, CapturedArgs, GenMapInfoCB,
-                              BodyGenCB, SimpleArgAccessorCB));
+                              BodyGenCB, SimpleArgAccessorCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
 
   Builder.CreateRetVoid();
   OMPBuilder.finalize();
@@ -6354,15 +6561,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) {
     Instruction *ThenTerm, *ElseTerm;
     SplitBlockAndInsertIfThenElse(Cmp, CodeGenIP.getBlock()->getTerminator(),
                                   &ThenTerm, &ElseTerm);
+    return Error::success();
   };
 
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   OpenMPIRBuilder::LocationDescription Loc(
       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
-  Builder.restoreIP(OMPBuilder.createTask(
-      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
-      BodyGenCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask(
+      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6460,15 +6669,18 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
   F->setName("func");
   IRBuilder<> Builder(BB);
 
-  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
 
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   OpenMPIRBuilder::LocationDescription Loc(
       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
-  Builder.restoreIP(OMPBuilder.createTask(
-      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
-      BodyGenCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask(
+      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6490,14 +6702,18 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
-  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   OpenMPIRBuilder::LocationDescription Loc(
       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
-  Builder.restoreIP(OMPBuilder.createTask(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask(
       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
-      /*Tied=*/false));
+      /*Tied=*/false);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6520,7 +6736,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
-  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   OpenMPIRBuilder::LocationDescription Loc(
@@ -6532,9 +6750,11 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
                                      Type::getInt32Ty(M->getContext()), InDep);
     DDS.push_back(DDIn);
   }
-  Builder.restoreIP(OMPBuilder.createTask(
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTask(
       Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB,
-      /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
+      /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6594,7 +6814,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
-  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
   Builder.SetInsertPoint(BodyBB);
@@ -6602,8 +6824,11 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
-  Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
-                                          /*Tied=*/false, Final));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
+                            /*Tied=*/false, Final);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6648,7 +6873,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
-  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+    return Error::success();
+  };
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   IRBuilderBase::InsertPoint AllocaIP = Builder.saveIP();
   Builder.SetInsertPoint(BodyBB);
@@ -6656,9 +6883,11 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
       CmpInst::Predicate::ICMP_EQ, F->getArg(0),
       ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
   OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
-  Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
-                                          /*Tied=*/false, /*Final=*/nullptr,
-                                          IfCondition));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+      OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
+                            /*Tied=*/false, /*Final=*/nullptr, IfCondition);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6742,15 +6971,17 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
     SplitBlockAndInsertIfThenElse(InternalIfCmp,
                                   CodeGenIP.getBlock()->getTerminator(),
                                   &ThenTerm, &ElseTerm);
+    return Error::success();
   };
 
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   OpenMPIRBuilder::LocationDescription Loc(
       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
-  Builder.restoreIP(OMPBuilder.createTaskgroup(
-      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
-      BodyGenCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTaskgroup(
+      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
@@ -6823,9 +7054,13 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
           Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt64(64));
       Builder.CreateStore(AddInst, Alloca64);
+      return Error::success();
     };
     OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
-    Builder.restoreIP(OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
+    OpenMPIRBuilder::InsertPointOrErrorTy TaskIP1 =
+        OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1);
+    assert(TaskIP1 && "unexpected error");
+    Builder.restoreIP(*TaskIP1);
 
     auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
       Builder.restoreIP(CodeGenIP);
@@ -6833,18 +7068,24 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
           Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
       Value *AddInst = Builder.CreateAdd(LoadValue, Builder.getInt32(32));
       Builder.CreateStore(AddInst, Alloca32);
+      return Error::success();
     };
     OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
-    Builder.restoreIP(OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
+    OpenMPIRBuilder::InsertPointOrErrorTy TaskIP2 =
+        OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2);
+    assert(TaskIP2 && "unexpected error");
+    Builder.restoreIP(*TaskIP2);
+    return Error::success();
   };
 
   BasicBlock *AllocaBB = Builder.GetInsertBlock();
   BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
   OpenMPIRBuilder::LocationDescription Loc(
       InsertPointTy(BodyBB, BodyBB->getFirstInsertionPt()), DL);
-  Builder.restoreIP(OMPBuilder.createTaskgroup(
-      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
-      BodyGenCB));
+  OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = OMPBuilder.createTaskgroup(
+      Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), BodyGenCB);
+  assert(AfterIP && "unexpected error");
+  Builder.restoreIP(*AfterIP);
   OMPBuilder.finalize();
   Builder.CreateRetVoid();
 
diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp
index 64e66aa5f97a6d0..81c17120a1f6fff 100644
--- a/llvm/unittests/IR/StructuralHashTest.cpp
+++ b/llvm/unittests/IR/StructuralHashTest.cpp
@@ -10,6 +10,7 @@
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
+#include "gmock/gmock-matchers.h"
 #include "gtest/gtest.h"
 
 #include <memory>
@@ -18,6 +19,11 @@ using namespace llvm;
 
 namespace {
 
+using testing::Contains;
+using testing::Key;
+using testing::Pair;
+using testing::SizeIs;
+
 std::unique_ptr<Module> parseIR(LLVMContext &Context, const char *IR) {
   SMDiagnostic Err;
   std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Context);
@@ -239,4 +245,59 @@ TEST(StructuralHashTest, ArgumentNumber) {
   EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
   EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
 }
+
+TEST(StructuralHashTest, Differences) {
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
+                                            "  %c = add i64 %a, 1\n"
+                                            "  %b = call i64 @f1(i64 %c)\n"
+                                            "  ret i64 %b\n"
+                                            "}\n"
+                                            "declare i64 @f1(i64)");
+  auto *F1 = M1->getFunction("f");
+  std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n"
+                                            "  %c = add i64 %a, 1\n"
+                                            "  %b = call i64 @f2(i64 %c)\n"
+                                            "  ret i64 %b\n"
+                                            "}\n"
+                                            "declare i64 @f2(i64)");
+  auto *F2 = M2->getFunction("g");
+
+  // They are originally different when not ignoring any operand.
+  EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true));
+  EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash,
+            StructuralHashWithDifferences(*F2, nullptr).FunctionHash);
+
+  // When we ignore the call target f1 vs f2, they have the same hash.
+  auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
+    return I->getOpcode() == Instruction::Call && OpndIdx == 1;
+  };
+  auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp);
+  auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp);
+  EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash);
+
+  // There are total 3 instructions.
+  EXPECT_THAT(*FuncHashInfo1.IndexInstruction, SizeIs(3));
+  EXPECT_THAT(*FuncHashInfo2.IndexInstruction, SizeIs(3));
+
+  // The only 1 operand (the call target) has been ignored.
+  EXPECT_THAT(*FuncHashInfo1.IndexOperandHashMap, SizeIs(1u));
+  EXPECT_THAT(*FuncHashInfo2.IndexOperandHashMap, SizeIs(1u));
+
+  // The index pair of instruction and operand (1, 1) is a key in the map.
+  ASSERT_THAT(*FuncHashInfo1.IndexOperandHashMap, Contains(Key(Pair(1, 1))));
+  ASSERT_THAT(*FuncHashInfo2.IndexOperandHashMap, Contains(Key(Pair(1, 1))));
+
+  // The indexed instruciton must be the call instruction as shown in the
+  // IgnoreOp above.
+  EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(),
+            Instruction::Call);
+  EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(),
+            Instruction::Call);
+
+  // The ignored operand hashes (for f1 vs. f2) are different.
+  EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}),
+            FuncHashInfo2.IndexOperandHashMap->lookup({1, 1}));
+}
+
 } // end anonymous namespace
diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp
index 91cd35a10e9b926..462578a34da837d 100644
--- a/llvm/unittests/IR/VerifierTest.cpp
+++ b/llvm/unittests/IR/VerifierTest.cpp
@@ -385,5 +385,35 @@ TEST(VerifierTest, AtomicRMW) {
       << Error;
 }
 
+TEST(VerifierTest, GetElementPtrInst) {
+  LLVMContext C;
+  Module M("M", C);
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false);
+  Function *F = Function::Create(FTy, Function::ExternalLinkage, "foo", M);
+  BasicBlock *Entry = BasicBlock::Create(C, "entry", F);
+  ReturnInst *RI = ReturnInst::Create(C, Entry);
+
+  FixedVectorType *V2P1Ty = FixedVectorType::get(PointerType::get(C, 1), 2);
+  FixedVectorType *V2P2Ty = FixedVectorType::get(PointerType::get(C, 2), 2);
+
+  Instruction *GEPVec = GetElementPtrInst::Create(
+      Type::getInt8Ty(C), ConstantAggregateZero::get(V2P1Ty),
+      {ConstantVector::getSplat(ElementCount::getFixed(2),
+                                ConstantInt::get(Type::getInt64Ty(C), 0))},
+      Entry);
+
+  GEPVec->insertBefore(RI);
+
+  // Break the address space of the source value
+  GEPVec->getOperandUse(0).set(ConstantAggregateZero::get(V2P2Ty));
+
+  std::string Error;
+  raw_string_ostream ErrorOS(Error);
+  EXPECT_TRUE(verifyFunction(*F, &ErrorOS));
+  EXPECT_TRUE(
+      StringRef(Error).starts_with("GEP address space doesn't match type"))
+      << Error;
+}
+
 } // end anonymous namespace
 } // end namespace llvm
diff --git a/llvm/unittests/SandboxIR/PassTest.cpp b/llvm/unittests/SandboxIR/PassTest.cpp
index 866bd8233d8035d..751aedefd8fe2d5 100644
--- a/llvm/unittests/SandboxIR/PassTest.cpp
+++ b/llvm/unittests/SandboxIR/PassTest.cpp
@@ -46,7 +46,7 @@ define void @foo() {
 
   public:
     TestPass(unsigned &BBCnt) : FunctionPass("test-pass"), BBCnt(BBCnt) {}
-    bool runOnFunction(Function &F) final {
+    bool runOnFunction(Function &F, const Analyses &A) final {
       for ([[maybe_unused]] auto &BB : F)
         ++BBCnt;
       return false;
@@ -59,7 +59,7 @@ define void @foo() {
   // Check classof().
   EXPECT_TRUE(llvm::isa<FunctionPass>(TPass));
   // Check runOnFunction();
-  TPass.runOnFunction(*F);
+  TPass.runOnFunction(*F, Analyses::emptyForTesting());
   EXPECT_EQ(BBCnt, 1u);
 #ifndef NDEBUG
   {
@@ -80,7 +80,7 @@ define void @foo() {
   class TestNamePass final : public FunctionPass {
   public:
     TestNamePass(llvm::StringRef Name) : FunctionPass(Name) {}
-    bool runOnFunction(Function &F) { return false; }
+    bool runOnFunction(Function &F, const Analyses &A) { return false; }
   };
   EXPECT_DEATH(TestNamePass("white space"), ".*whitespace.*");
   EXPECT_DEATH(TestNamePass("-dash"), ".*start with.*");
@@ -106,7 +106,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   public:
     TestPass(unsigned &InstCount)
         : RegionPass("test-pass"), InstCount(InstCount) {}
-    bool runOnRegion(Region &R) final {
+    bool runOnRegion(Region &R, const Analyses &A) final {
       for ([[maybe_unused]] auto &Inst : R) {
         ++InstCount;
       }
@@ -121,7 +121,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   llvm::SmallVector<std::unique_ptr<Region>> Regions =
       Region::createRegionsFromMD(*F);
   ASSERT_EQ(Regions.size(), 1u);
-  TPass.runOnRegion(*Regions[0]);
+  TPass.runOnRegion(*Regions[0], Analyses::emptyForTesting());
   EXPECT_EQ(InstCount, 2u);
 #ifndef NDEBUG
   {
@@ -142,7 +142,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   class TestNamePass final : public RegionPass {
   public:
     TestNamePass(llvm::StringRef Name) : RegionPass(Name) {}
-    bool runOnRegion(Region &F) { return false; }
+    bool runOnRegion(Region &F, const Analyses &A) { return false; }
   };
   EXPECT_DEATH(TestNamePass("white space"), ".*whitespace.*");
   EXPECT_DEATH(TestNamePass("-dash"), ".*start with.*");
@@ -161,7 +161,7 @@ define void @foo() {
 
   public:
     TestPass1(unsigned &BBCnt) : FunctionPass("test-pass1"), BBCnt(BBCnt) {}
-    bool runOnFunction(Function &F) final {
+    bool runOnFunction(Function &F, const Analyses &A) final {
       for ([[maybe_unused]] auto &BB : F)
         ++BBCnt;
       return false;
@@ -172,7 +172,7 @@ define void @foo() {
 
   public:
     TestPass2(unsigned &BBCnt) : FunctionPass("test-pass2"), BBCnt(BBCnt) {}
-    bool runOnFunction(Function &F) final {
+    bool runOnFunction(Function &F, const Analyses &A) final {
       for ([[maybe_unused]] auto &BB : F)
         ++BBCnt;
       return false;
@@ -185,7 +185,7 @@ define void @foo() {
   FPM.addPass(std::make_unique<TestPass1>(BBCnt1));
   FPM.addPass(std::make_unique<TestPass2>(BBCnt2));
   // Check runOnFunction().
-  FPM.runOnFunction(*F);
+  FPM.runOnFunction(*F, Analyses::emptyForTesting());
   EXPECT_EQ(BBCnt1, 1u);
   EXPECT_EQ(BBCnt2, 1u);
 #ifndef NDEBUG
@@ -216,7 +216,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   public:
     TestPass1(unsigned &InstCount)
         : RegionPass("test-pass1"), InstCount(InstCount) {}
-    bool runOnRegion(Region &R) final {
+    bool runOnRegion(Region &R, const Analyses &A) final {
       for ([[maybe_unused]] auto &Inst : R)
         ++InstCount;
       return false;
@@ -228,7 +228,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   public:
     TestPass2(unsigned &InstCount)
         : RegionPass("test-pass2"), InstCount(InstCount) {}
-    bool runOnRegion(Region &R) final {
+    bool runOnRegion(Region &R, const Analyses &A) final {
       for ([[maybe_unused]] auto &Inst : R)
         ++InstCount;
       return false;
@@ -244,7 +244,7 @@ define i8 @foo(i8 %v0, i8 %v1) {
   llvm::SmallVector<std::unique_ptr<Region>> Regions =
       Region::createRegionsFromMD(*F);
   ASSERT_EQ(Regions.size(), 1u);
-  RPM.runOnRegion(*Regions[0]);
+  RPM.runOnRegion(*Regions[0], Analyses::emptyForTesting());
   EXPECT_EQ(InstCount1, 2u);
   EXPECT_EQ(InstCount2, 2u);
 #ifndef NDEBUG
@@ -270,7 +270,7 @@ define void @f() {
   public:
     FooPass(std::string &Str, llvm::StringRef Args)
         : FunctionPass("foo-pass"), Str(Str), Args(Args.str()) {}
-    bool runOnFunction(Function &F) final {
+    bool runOnFunction(Function &F, const Analyses &A) final {
       Str += "foo<" + Args + ">";
       return false;
     }
@@ -282,7 +282,7 @@ define void @f() {
   public:
     BarPass(std::string &Str, llvm::StringRef Args)
         : FunctionPass("bar-pass"), Str(Str), Args(Args.str()) {}
-    bool runOnFunction(Function &F) final {
+    bool runOnFunction(Function &F, const Analyses &A) final {
       Str += "bar<" + Args + ">";
       return false;
     }
@@ -302,7 +302,7 @@ define void @f() {
   FunctionPassManager FPM("test-fpm");
   FPM.setPassPipeline("foo<abc>,bar<nested1<nested2<nested3>>>,foo",
                       CreatePass);
-  FPM.runOnFunction(*F);
+  FPM.runOnFunction(*F, Analyses::emptyForTesting());
   EXPECT_EQ(Str, "foo<abc>bar<nested1<nested2<nested3>>>foo<>");
 
   // A second call to setPassPipeline will trigger an assertion in debug mode.
diff --git a/llvm/unittests/SandboxIR/SandboxIRTest.cpp b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
index 97113b303f72e5e..874c32c2d4398ff 100644
--- a/llvm/unittests/SandboxIR/SandboxIRTest.cpp
+++ b/llvm/unittests/SandboxIR/SandboxIRTest.cpp
@@ -21,7 +21,7 @@
 #include "llvm/SandboxIR/Utils.h"
 #include "llvm/SandboxIR/Value.h"
 #include "llvm/Support/SourceMgr.h"
-#include "gmock/gmock-matchers.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -5962,3 +5962,100 @@ TEST_F(SandboxIRTest, CheckClassof) {
   EXPECT_NE(&sandboxir::CLASS::classof, &sandboxir::Instruction::classof);
 #include "llvm/SandboxIR/Values.def"
 }
+
+TEST_F(SandboxIRTest, InstructionCallbacks) {
+  parseIR(C, R"IR(
+    define void @foo(ptr %ptr, i8 %val) {
+      ret void
+    }
+  )IR");
+  Function &LLVMF = *M->getFunction("foo");
+  sandboxir::Context Ctx(C);
+
+  auto &F = *Ctx.createFunction(&LLVMF);
+  auto &BB = *F.begin();
+  sandboxir::Argument *Ptr = F.getArg(0);
+  sandboxir::Argument *Val = F.getArg(1);
+  sandboxir::Instruction *Ret = &BB.front();
+
+  SmallVector<sandboxir::Instruction *> Inserted;
+  auto InsertCbId = Ctx.registerCreateInstrCallback(
+      [&Inserted](sandboxir::Instruction *I) { Inserted.push_back(I); });
+
+  SmallVector<sandboxir::Instruction *> Removed;
+  auto RemoveCbId = Ctx.registerEraseInstrCallback(
+      [&Removed](sandboxir::Instruction *I) { Removed.push_back(I); });
+
+  // Keep the moved instruction and the instruction pointed by the Where
+  // iterator so we can check both callback arguments work as expected.
+  SmallVector<std::pair<sandboxir::Instruction *, sandboxir::Instruction *>>
+      Moved;
+  auto MoveCbId = Ctx.registerMoveInstrCallback(
+      [&Moved](sandboxir::Instruction *I, const sandboxir::BBIterator &Where) {
+        // Use a nullptr to signal "move to end" to keep it single. We only
+        // have a basic block in this test case anyway.
+        if (Where == Where.getNodeParent()->end())
+          Moved.push_back(std::make_pair(I, nullptr));
+        else
+          Moved.push_back(std::make_pair(I, &*Where));
+      });
+
+  // Two more insertion callbacks, to check that they're called in registration
+  // order.
+  SmallVector<int> Order;
+  auto CheckOrderInsertCbId1 = Ctx.registerCreateInstrCallback(
+      [&Order](sandboxir::Instruction *I) { Order.push_back(1); });
+
+  auto CheckOrderInsertCbId2 = Ctx.registerCreateInstrCallback(
+      [&Order](sandboxir::Instruction *I) { Order.push_back(2); });
+
+  Ctx.save();
+  auto *NewI = sandboxir::StoreInst::create(Val, Ptr, /*Align=*/std::nullopt,
+                                            Ret->getIterator(), Ctx);
+  EXPECT_THAT(Inserted, testing::ElementsAre(NewI));
+  EXPECT_THAT(Removed, testing::IsEmpty());
+  EXPECT_THAT(Moved, testing::IsEmpty());
+  EXPECT_THAT(Order, testing::ElementsAre(1, 2));
+
+  Ret->moveBefore(NewI);
+  EXPECT_THAT(Inserted, testing::ElementsAre(NewI));
+  EXPECT_THAT(Removed, testing::IsEmpty());
+  EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI)));
+
+  Ret->eraseFromParent();
+  EXPECT_THAT(Inserted, testing::ElementsAre(NewI));
+  EXPECT_THAT(Removed, testing::ElementsAre(Ret));
+  EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI)));
+
+  NewI->eraseFromParent();
+  EXPECT_THAT(Inserted, testing::ElementsAre(NewI));
+  EXPECT_THAT(Removed, testing::ElementsAre(Ret, NewI));
+  EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI)));
+
+  // Check that after revert the callbacks have been called for the inverse
+  // operations of the changes made so far.
+  Ctx.revert();
+  EXPECT_THAT(Inserted, testing::ElementsAre(NewI, NewI, Ret));
+  EXPECT_THAT(Removed, testing::ElementsAre(Ret, NewI, NewI));
+  EXPECT_THAT(Moved, testing::ElementsAre(std::make_pair(Ret, NewI),
+                                          std::make_pair(Ret, nullptr)));
+  EXPECT_THAT(Order, testing::ElementsAre(1, 2, 1, 2, 1, 2));
+
+  // Check that deregistration works. Do an operation of each type after
+  // deregistering callbacks and check.
+  Inserted.clear();
+  Removed.clear();
+  Moved.clear();
+  Ctx.unregisterCreateInstrCallback(InsertCbId);
+  Ctx.unregisterEraseInstrCallback(RemoveCbId);
+  Ctx.unregisterMoveInstrCallback(MoveCbId);
+  Ctx.unregisterCreateInstrCallback(CheckOrderInsertCbId1);
+  Ctx.unregisterCreateInstrCallback(CheckOrderInsertCbId2);
+  auto *NewI2 = sandboxir::StoreInst::create(Val, Ptr, /*Align=*/std::nullopt,
+                                             Ret->getIterator(), Ctx);
+  Ret->moveBefore(NewI2);
+  Ret->eraseFromParent();
+  EXPECT_THAT(Inserted, testing::IsEmpty());
+  EXPECT_THAT(Removed, testing::IsEmpty());
+  EXPECT_THAT(Moved, testing::IsEmpty());
+}
diff --git a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
index ded43a4ff7875ad..30f80601d96cbba 100644
--- a/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/TargetParser/RISCVISAInfoTest.cpp
@@ -643,60 +643,22 @@ TEST(ParseArchString, MissingDepency) {
               "'zvl*b' requires 'v' or 'zve*' extension to also be specified");
   }
 
-  for (StringRef Input : {"rv32i_zvbb"}) {
+  // These all have an implication relationship, thus should pass
+  for (StringRef Input : {
+           "rv32i_zvbb",
+           "rv32i_zvbc32e0p7",
+           "rv32i_zvbc",
+           "rv32i_zvkb",
+           "rv32i_zvkg",
+           "rv32i_zvkgs0p7",
+           "rv32i_zvkned",
+           "rv32i_zvknha",
+           "rv32i_zvksed",
+           "rv32i_zvksh",
+           "rv32i_zvknhb",
+       }) {
     EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvbb' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvbc32e0p7"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvbc32e' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvbc"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvbc' requires 'v' or 'zve64*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvkb"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvkb' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvkg"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvkg' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvkgs0p7"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvkg' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvkned"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvkned' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvknha"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvknha' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvksed"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvksed' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvksh"}) {
-    EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-              "'zvksh' requires 'v' or 'zve*' extension to also be specified");
-  }
-
-  for (StringRef Input : {"rv32i_zvknhb"}) {
-    EXPECT_EQ(
-        toString(RISCVISAInfo::parseArchString(Input, true).takeError()),
-        "'zvknhb' requires 'v' or 'zve64*' extension to also be specified");
+              "");
   }
 
   for (StringRef Input : {"rv32i_zacas1p0"}) {
@@ -774,8 +736,8 @@ TEST(ParseArchString,
 TEST(ParseArchString,
      RejectsExperimentalProfilesIfEnableExperimentalExtensionsNotSet) {
   EXPECT_EQ(
-      toString(RISCVISAInfo::parseArchString("rva23u64", false).takeError()),
-      "requires '-menable-experimental-extensions' for profile 'rva23u64'");
+      toString(RISCVISAInfo::parseArchString("rvm23u32", false).takeError()),
+      "requires '-menable-experimental-extensions' for profile 'rvm23u32'");
 }
 
 TEST(ToFeatures, IIsDroppedAndExperimentalExtensionsArePrefixed) {
@@ -1055,6 +1017,7 @@ R"(All available -march extensions for RISC-V
     zvl8192b             1.0
     zhinx                1.0
     zhinxmin             1.0
+    sha                  1.0
     shcounterenw         1.0
     shgatpa              1.0
     shtvala              1.0
@@ -1065,6 +1028,8 @@ R"(All available -march extensions for RISC-V
     smcdeleg             1.0
     smcsrind             1.0
     smepmp               1.0
+    smmpm                1.0
+    smnpm                1.0
     smrnmi               1.0
     smstateen            1.0
     ssaia                1.0
@@ -1073,6 +1038,8 @@ R"(All available -march extensions for RISC-V
     sscofpmf             1.0
     sscounterenw         1.0
     sscsrind             1.0
+    ssnpm                1.0
+    sspm                 1.0
     ssqosid              1.0
     ssstateen            1.0
     ssstrict             1.0
@@ -1080,12 +1047,14 @@ R"(All available -march extensions for RISC-V
     sstvala              1.0
     sstvecd              1.0
     ssu64xl              1.0
+    supm                 1.0
     svade                1.0
     svadu                1.0
     svbare               1.0
     svinval              1.0
     svnapot              1.0
     svpbmt               1.0
+    svvptc               1.0
     xcvalu               1.0
     xcvbi                1.0
     xcvbitmanip          1.0
@@ -1122,26 +1091,21 @@ Experimental extensions
     zvbc32e              0.7
     zvkgs                0.7
     smctr                1.0
-    smmpm                1.0
-    smnpm                1.0
     ssctr                1.0
-    ssnpm                1.0
-    sspm                 1.0
-    supm                 1.0
 
 Supported Profiles
     rva20s64
     rva20u64
     rva22s64
     rva22u64
-    rvi20u32
-    rvi20u64
-
-Experimental Profiles
     rva23s64
     rva23u64
     rvb23s64
     rvb23u64
+    rvi20u32
+    rvi20u64
+
+Experimental Profiles
     rvm23u32
 
 Use -march to specify the target's extension.
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
index 24512cb0225e8ee..df689767b772457 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/CMakeLists.txt
@@ -13,4 +13,5 @@ add_llvm_unittest(SandboxVectorizerTests
   LegalityTest.cpp
   SchedulerTest.cpp
   SeedCollectorTest.cpp	
+  VecUtilsTest.cpp
 )
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
index 76e5a5ce5aed920..50b78f6f48afdf7 100644
--- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp
@@ -29,13 +29,21 @@ struct LegalityTest : public testing::Test {
 
 TEST_F(LegalityTest, Legality) {
   parseIR(C, R"IR(
-define void @foo(ptr %ptr) {
+define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1, i64 %v0, i64 %v1) {
   %gep0 = getelementptr float, ptr %ptr, i32 0
   %gep1 = getelementptr float, ptr %ptr, i32 1
+  %gep3 = getelementptr float, ptr %ptr, i32 3
   %ld0 = load float, ptr %gep0
   %ld1 = load float, ptr %gep0
   store float %ld0, ptr %gep0
   store float %ld1, ptr %gep1
+  store <2 x float> %vec2, ptr %gep1
+  store <3 x float> %vec3, ptr %gep3
+  store i8 %arg, ptr %gep1
+  %fadd0 = fadd float %farg0, %farg0
+  %fadd1 = fadd fast float %farg1, %farg1
+  %trunc0 = trunc nuw nsw i64 %v0 to i8
+  %trunc1 = trunc nsw i64 %v1 to i8
   ret void
 }
 )IR");
@@ -46,14 +54,61 @@ define void @foo(ptr %ptr) {
   auto It = BB->begin();
   [[maybe_unused]] auto *Gep0 = cast<sandboxir::GetElementPtrInst>(&*It++);
   [[maybe_unused]] auto *Gep1 = cast<sandboxir::GetElementPtrInst>(&*It++);
+  [[maybe_unused]] auto *Gep3 = cast<sandboxir::GetElementPtrInst>(&*It++);
   [[maybe_unused]] auto *Ld0 = cast<sandboxir::LoadInst>(&*It++);
   [[maybe_unused]] auto *Ld1 = cast<sandboxir::LoadInst>(&*It++);
   auto *St0 = cast<sandboxir::StoreInst>(&*It++);
   auto *St1 = cast<sandboxir::StoreInst>(&*It++);
+  auto *StVec2 = cast<sandboxir::StoreInst>(&*It++);
+  auto *StVec3 = cast<sandboxir::StoreInst>(&*It++);
+  auto *StI8 = cast<sandboxir::StoreInst>(&*It++);
+  auto *FAdd0 = cast<sandboxir::BinaryOperator>(&*It++);
+  auto *FAdd1 = cast<sandboxir::BinaryOperator>(&*It++);
+  auto *Trunc0 = cast<sandboxir::TruncInst>(&*It++);
+  auto *Trunc1 = cast<sandboxir::TruncInst>(&*It++);
 
   sandboxir::LegalityAnalysis Legality;
-  auto Result = Legality.canVectorize({St0, St1});
+  const auto &Result = Legality.canVectorize({St0, St1});
   EXPECT_TRUE(isa<sandboxir::Widen>(Result));
+
+  {
+    // Check NotInstructions
+    auto &Result = Legality.canVectorize({F, St0});
+    EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+    EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+              sandboxir::ResultReason::NotInstructions);
+  }
+  {
+    // Check DiffOpcodes
+    const auto &Result = Legality.canVectorize({St0, Ld0});
+    EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+    EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+              sandboxir::ResultReason::DiffOpcodes);
+  }
+  {
+    // Check DiffTypes
+    EXPECT_TRUE(isa<sandboxir::Widen>(Legality.canVectorize({St0, StVec2})));
+    EXPECT_TRUE(isa<sandboxir::Widen>(Legality.canVectorize({StVec2, StVec3})));
+
+    const auto &Result = Legality.canVectorize({St0, StI8});
+    EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+    EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+              sandboxir::ResultReason::DiffTypes);
+  }
+  {
+    // Check DiffMathFlags
+    const auto &Result = Legality.canVectorize({FAdd0, FAdd1});
+    EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+    EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+              sandboxir::ResultReason::DiffMathFlags);
+  }
+  {
+    // Check DiffWrapFlags
+    const auto &Result = Legality.canVectorize({Trunc0, Trunc1});
+    EXPECT_TRUE(isa<sandboxir::Pack>(Result));
+    EXPECT_EQ(cast<sandboxir::Pack>(Result).getReason(),
+              sandboxir::ResultReason::DiffWrapFlags);
+  }
 }
 
 #ifndef NDEBUG
@@ -68,11 +123,20 @@ TEST_F(LegalityTest, LegalityResultDump) {
   sandboxir::LegalityAnalysis Legality;
   EXPECT_TRUE(
       Matches(Legality.createLegalityResult<sandboxir::Widen>(), "Widen"));
+  EXPECT_TRUE(Matches(Legality.createLegalityResult<sandboxir::Pack>(
+                          sandboxir::ResultReason::NotInstructions),
+                      "Pack Reason: NotInstructions"));
   EXPECT_TRUE(Matches(Legality.createLegalityResult<sandboxir::Pack>(
                           sandboxir::ResultReason::DiffOpcodes),
                       "Pack Reason: DiffOpcodes"));
   EXPECT_TRUE(Matches(Legality.createLegalityResult<sandboxir::Pack>(
                           sandboxir::ResultReason::DiffTypes),
                       "Pack Reason: DiffTypes"));
+  EXPECT_TRUE(Matches(Legality.createLegalityResult<sandboxir::Pack>(
+                          sandboxir::ResultReason::DiffMathFlags),
+                      "Pack Reason: DiffMathFlags"));
+  EXPECT_TRUE(Matches(Legality.createLegalityResult<sandboxir::Pack>(
+                          sandboxir::ResultReason::DiffWrapFlags),
+                      "Pack Reason: DiffWrapFlags"));
 }
 #endif // NDEBUG
diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
new file mode 100644
index 000000000000000..e0b082849643925
--- /dev/null
+++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp
@@ -0,0 +1,37 @@
+//===- VecUtilsTest.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/SandboxIR/Context.h"
+#include "llvm/SandboxIR/Type.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+struct VecUtilsTest : public testing::Test {
+  LLVMContext C;
+};
+
+TEST_F(VecUtilsTest, GetNumElements) {
+  sandboxir::Context Ctx(C);
+  auto *ElemTy = sandboxir::Type::getInt32Ty(Ctx);
+  EXPECT_EQ(sandboxir::VecUtils::getNumElements(ElemTy), 1);
+  auto *VTy = sandboxir::FixedVectorType::get(ElemTy, 2);
+  EXPECT_EQ(sandboxir::VecUtils::getNumElements(VTy), 2);
+  auto *VTy1 = sandboxir::FixedVectorType::get(ElemTy, 1);
+  EXPECT_EQ(sandboxir::VecUtils::getNumElements(VTy1), 1);
+}
+
+TEST_F(VecUtilsTest, GetElementType) {
+  sandboxir::Context Ctx(C);
+  auto *ElemTy = sandboxir::Type::getInt32Ty(Ctx);
+  EXPECT_EQ(sandboxir::VecUtils::getElementType(ElemTy), ElemTy);
+  auto *VTy = sandboxir::FixedVectorType::get(ElemTy, 2);
+  EXPECT_EQ(sandboxir::VecUtils::getElementType(VTy), ElemTy);
+}
diff --git a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp
index 6b8ebf96cdf383e..792d047139466cf 100644
--- a/llvm/utils/TableGen/ARMTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/ARMTargetDefEmitter.cpp
@@ -25,19 +25,19 @@
 using namespace llvm;
 
 /// Collect the full set of implied features for a SubtargetFeature.
-static void CollectImpliedFeatures(std::set<const Record *> &SeenFeats,
+static void collectImpliedFeatures(std::set<const Record *> &SeenFeats,
                                    const Record *Rec) {
   assert(Rec->isSubClassOf("SubtargetFeature") &&
          "Rec is not a SubtargetFeature");
 
   SeenFeats.insert(Rec);
   for (const Record *Implied : Rec->getValueAsListOfDefs("Implies"))
-    CollectImpliedFeatures(SeenFeats, Implied);
+    collectImpliedFeatures(SeenFeats, Implied);
 }
 
-static void CheckFeatureTree(const Record *Root) {
+static void checkFeatureTree(const Record *Root) {
   std::set<const Record *> SeenFeats;
-  CollectImpliedFeatures(SeenFeats, Root);
+  collectImpliedFeatures(SeenFeats, Root);
 
   // Check that each of the mandatory (implied) features which is an
   // ExtensionWithMArch is also enabled by default.
@@ -53,12 +53,12 @@ static void CheckFeatureTree(const Record *Root) {
   }
 }
 
-static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
+static void emitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
   OS << "// Autogenerated by ARMTargetDefEmitter.cpp\n\n";
 
   // Look through all SubtargetFeature defs with the given FieldName, and
   // collect the set of all Values that that FieldName is set to.
-  auto gatherSubtargetFeatureFieldValues = [&RK](StringRef FieldName) {
+  auto GatherSubtargetFeatureFieldValues = [&RK](StringRef FieldName) {
     llvm::StringSet<> Set;
     for (const Record *Rec : RK.getAllDerivedDefinitions("SubtargetFeature")) {
       if (Rec->getValueAsString("FieldName") == FieldName) {
@@ -88,7 +88,7 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
      << "#define ARM_PROCESSOR_FAMILY(ENUM)\n"
      << "#endif\n\n";
   const StringSet<> ARMProcFamilyVals =
-      gatherSubtargetFeatureFieldValues("ARMProcFamily");
+      GatherSubtargetFeatureFieldValues("ARMProcFamily");
   for (const StringRef &Family : ARMProcFamilyVals.keys())
     OS << "ARM_PROCESSOR_FAMILY(" << Family << ")\n";
   OS << "\n#undef ARM_PROCESSOR_FAMILY\n\n";
@@ -97,7 +97,7 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
      << "#define ARM_ARCHITECTURE(ENUM)\n"
      << "#endif\n\n";
   // This should correspond to instances of the Architecture tablegen class.
-  const StringSet<> ARMArchVals = gatherSubtargetFeatureFieldValues("ARMArch");
+  const StringSet<> ARMArchVals = GatherSubtargetFeatureFieldValues("ARMArch");
   for (const StringRef &Arch : ARMArchVals.keys())
     OS << "ARM_ARCHITECTURE(" << Arch << ")\n";
   OS << "\n#undef ARM_ARCHITECTURE\n\n";
@@ -315,7 +315,7 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
     auto Profile = Arch->getValueAsString("Profile");
     auto ArchInfo = ArchInfoName(Major, Minor, Profile);
 
-    CheckFeatureTree(Arch);
+    checkFeatureTree(Arch);
 
     OS << "  {\n"
        << "    \"" << Name << "\",\n"
@@ -343,5 +343,5 @@ static void EmitARMTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
 }
 
 static TableGen::Emitter::Opt
-    X("gen-arm-target-def", EmitARMTargetDef,
+    X("gen-arm-target-def", emitARMTargetDef,
       "Generate the ARM or AArch64 Architecture information header.");
diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp
index 138275356dc9546..66ba25c6dcc87d4 100644
--- a/llvm/utils/TableGen/Attributes.cpp
+++ b/llvm/utils/TableGen/Attributes.cpp
@@ -9,7 +9,6 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <vector>
 using namespace llvm;
 
 #define DEBUG_TYPE "attr-enum"
diff --git a/llvm/utils/TableGen/CTagsEmitter.cpp b/llvm/utils/TableGen/CTagsEmitter.cpp
index 3718486ff7ad4e6..413d8f5dbcff074 100644
--- a/llvm/utils/TableGen/CTagsEmitter.cpp
+++ b/llvm/utils/TableGen/CTagsEmitter.cpp
@@ -17,7 +17,7 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <algorithm>
+#include <tuple>
 #include <vector>
 using namespace llvm;
 
diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp
index c8f263e15d96b76..de20303a5bfd208 100644
--- a/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -35,12 +35,12 @@ class CallingConvEmitter {
 public:
   explicit CallingConvEmitter(const RecordKeeper &R) : Records(R) {}
 
-  void run(raw_ostream &o);
+  void run(raw_ostream &O);
 
 private:
-  void EmitCallingConv(const Record *CC, raw_ostream &O);
-  void EmitAction(const Record *Action, indent Indent, raw_ostream &O);
-  void EmitArgRegisterLists(raw_ostream &O);
+  void emitCallingConv(const Record *CC, raw_ostream &O);
+  void emitAction(const Record *Action, indent Indent, raw_ostream &O);
+  void emitArgRegisterLists(raw_ostream &O);
 };
 } // End anonymous namespace
 
@@ -75,16 +75,16 @@ void CallingConvEmitter::run(raw_ostream &O) {
   Records.getTimer().startTimer("Emit full descriptions");
   for (const Record *CC : CCs) {
     if (!CC->getValueAsBit("Custom")) {
-      EmitCallingConv(CC, O);
+      emitCallingConv(CC, O);
     }
   }
 
-  EmitArgRegisterLists(O);
+  emitArgRegisterLists(O);
 
   O << "\n#endif // CC_REGISTER_LIST\n";
 }
 
-void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) {
+void CallingConvEmitter::emitCallingConv(const Record *CC, raw_ostream &O) {
   const ListInit *CCActions = CC->getValueAsListInit("Actions");
   Counter = 0;
 
@@ -107,8 +107,8 @@ void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) {
     << std::string(Pad, ' ') << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
     << std::string(Pad, ' ') << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
   // Emit all of the actions, in order.
-  for (unsigned i = 0, e = CCActions->size(); i != e; ++i) {
-    const Record *Action = CCActions->getElementAsRecord(i);
+  for (unsigned I = 0, E = CCActions->size(); I != E; ++I) {
+    const Record *Action = CCActions->getElementAsRecord(I);
     SwiftAction =
         llvm::any_of(Action->getSuperClasses(),
                      [](const std::pair<const Record *, SMRange> &Class) {
@@ -117,23 +117,23 @@ void CallingConvEmitter::EmitCallingConv(const Record *CC, raw_ostream &O) {
                      });
 
     O << "\n";
-    EmitAction(Action, indent(2), O);
+    emitAction(Action, indent(2), O);
   }
 
   O << "\n  return true; // CC didn't match.\n";
   O << "}\n";
 }
 
-void CallingConvEmitter::EmitAction(const Record *Action, indent Indent,
+void CallingConvEmitter::emitAction(const Record *Action, indent Indent,
                                     raw_ostream &O) {
   if (Action->isSubClassOf("CCPredicateAction")) {
     O << Indent << "if (";
 
     if (Action->isSubClassOf("CCIfType")) {
       const ListInit *VTs = Action->getValueAsListInit("VTs");
-      for (unsigned i = 0, e = VTs->size(); i != e; ++i) {
-        const Record *VT = VTs->getElementAsRecord(i);
-        if (i != 0)
+      for (unsigned I = 0, E = VTs->size(); I != E; ++I) {
+        const Record *VT = VTs->getElementAsRecord(I);
+        if (I != 0)
           O << " ||\n    " << Indent;
         O << "LocVT == " << getEnumName(getValueType(VT));
       }
@@ -146,7 +146,7 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent,
     }
 
     O << ") {\n";
-    EmitAction(Action->getValueAsDef("SubAction"), Indent + 2, O);
+    emitAction(Action->getValueAsDef("SubAction"), Indent + 2, O);
     O << Indent << "}\n";
   } else {
     if (Action->isSubClassOf("CCDelegateTo")) {
@@ -171,8 +171,8 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent,
           << "[] = {\n";
         O << Indent << "  ";
         ListSeparator LS;
-        for (unsigned i = 0, e = RegList->size(); i != e; ++i) {
-          std::string Name = getQualifiedName(RegList->getElementAsRecord(i));
+        for (unsigned I = 0, E = RegList->size(); I != E; ++I) {
+          std::string Name = getQualifiedName(RegList->getElementAsRecord(I));
           if (SwiftAction)
             AssignedSwiftRegsMap[CurrentAction].insert(Name);
           else
@@ -230,16 +230,16 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent,
           << "[] = {\n";
         O << Indent << "  ";
         ListSeparator LS;
-        for (unsigned i = 0, e = RegList->size(); i != e; ++i)
-          O << LS << getQualifiedName(RegList->getElementAsRecord(i));
+        for (unsigned I = 0, E = RegList->size(); I != E; ++I)
+          O << LS << getQualifiedName(RegList->getElementAsRecord(I));
         O << "\n" << Indent << "};\n";
 
         O << Indent << "static const MCPhysReg RegList" << ShadowRegListNumber
           << "[] = {\n";
         O << Indent << "  ";
         ListSeparator LSS;
-        for (unsigned i = 0, e = ShadowRegList->size(); i != e; ++i)
-          O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(i));
+        for (unsigned I = 0, E = ShadowRegList->size(); I != E; ++I)
+          O << LSS << getQualifiedName(ShadowRegList->getElementAsRecord(I));
         O << "\n" << Indent << "};\n";
 
         O << Indent << "if (MCRegister Reg = State.AllocateReg(RegList"
@@ -287,8 +287,8 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent,
         << ShadowRegListNumber << "[] = {\n";
       O << Indent << "  ";
       ListSeparator LS;
-      for (unsigned i = 0, e = ShadowRegList->size(); i != e; ++i)
-        O << LS << getQualifiedName(ShadowRegList->getElementAsRecord(i));
+      for (unsigned I = 0, E = ShadowRegList->size(); I != E; ++I)
+        O << LS << getQualifiedName(ShadowRegList->getElementAsRecord(I));
       O << "\n" << Indent << "};\n";
 
       O << Indent << "int64_t Offset" << ++Counter << " = State.AllocateStack("
@@ -357,7 +357,7 @@ void CallingConvEmitter::EmitAction(const Record *Action, indent Indent,
   }
 }
 
-void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) {
+void CallingConvEmitter::emitArgRegisterLists(raw_ostream &O) {
   // Transitively merge all delegated CCs into AssignedRegsMap.
   using EntryTy = std::pair<std::string, std::set<std::string>>;
   bool Redo;
diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index be822c4815289cd..407ee81b7e0b6c3 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -52,10 +52,10 @@ class CodeEmitterGen {
 public:
   CodeEmitterGen(const RecordKeeper &R) : Records(R) {}
 
-  void run(raw_ostream &o);
+  void run(raw_ostream &O);
 
 private:
-  int getVariableBit(const std::string &VarName, const BitsInit *BI, int bit);
+  int getVariableBit(const std::string &VarName, const BitsInit *BI, int Bit);
   std::pair<std::string, std::string>
   getInstructionCases(const Record *R, const CodeGenTarget &Target);
   void addInstructionCasesForEncoding(const Record *R,
@@ -69,10 +69,10 @@ class CodeEmitterGen {
                                const CodeGenTarget &Target);
 
   void emitInstructionBaseValues(
-      raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+      raw_ostream &O, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
       const CodeGenTarget &Target, unsigned HwMode = DefaultMode);
   void
-  emitCaseMap(raw_ostream &o,
+  emitCaseMap(raw_ostream &O,
               const std::map<std::string, std::vector<std::string>> &CaseMap);
   unsigned BitWidth = 0u;
   bool UseAPInt = false;
@@ -81,12 +81,12 @@ class CodeEmitterGen {
 // If the VarBitInit at position 'bit' matches the specified variable then
 // return the variable bit position.  Otherwise return -1.
 int CodeEmitterGen::getVariableBit(const std::string &VarName,
-                                   const BitsInit *BI, int bit) {
-  if (const VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {
+                                   const BitsInit *BI, int Bit) {
+  if (const VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(Bit))) {
     if (const VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))
       if (VI->getName() == VarName)
         return VBI->getBitNum();
-  } else if (const VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {
+  } else if (const VarInit *VI = dyn_cast<VarInit>(BI->getBit(Bit))) {
     if (VI->getName() == VarName)
       return 0;
   }
@@ -104,19 +104,19 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R,
   CodeGenInstruction &CGI = Target.getInstruction(R);
 
   // Determine if VarName actually contributes to the Inst encoding.
-  int bit = BI->getNumBits() - 1;
+  int Bit = BI->getNumBits() - 1;
 
   // Scan for a bit that this contributed to.
-  for (; bit >= 0;) {
-    if (getVariableBit(VarName, BI, bit) != -1)
+  for (; Bit >= 0;) {
+    if (getVariableBit(VarName, BI, Bit) != -1)
       break;
 
-    --bit;
+    --Bit;
   }
 
   // If we found no bits, ignore this value, otherwise emit the call to get the
   // operand encoding.
-  if (bit < 0)
+  if (Bit < 0)
     return true;
 
   // If the operand matches by name, reference according to that
@@ -175,97 +175,97 @@ bool CodeEmitterGen::addCodeToMergeInOperand(const Record *R,
   // Precalculate the number of lits this variable contributes to in the
   // operand. If there is a single lit (consecutive range of bits) we can use a
   // destructive sequence on APInt that reduces memory allocations.
-  int numOperandLits = 0;
-  for (int tmpBit = bit; tmpBit >= 0;) {
-    int varBit = getVariableBit(VarName, BI, tmpBit);
+  int NumOperandLits = 0;
+  for (int TmpBit = Bit; TmpBit >= 0;) {
+    int VarBit = getVariableBit(VarName, BI, TmpBit);
 
     // If this bit isn't from a variable, skip it.
-    if (varBit == -1) {
-      --tmpBit;
+    if (VarBit == -1) {
+      --TmpBit;
       continue;
     }
 
     // Figure out the consecutive range of bits covered by this operand, in
     // order to generate better encoding code.
-    int beginVarBit = varBit;
+    int BeginVarBit = VarBit;
     int N = 1;
-    for (--tmpBit; tmpBit >= 0;) {
-      varBit = getVariableBit(VarName, BI, tmpBit);
-      if (varBit == -1 || varBit != (beginVarBit - N))
+    for (--TmpBit; TmpBit >= 0;) {
+      VarBit = getVariableBit(VarName, BI, TmpBit);
+      if (VarBit == -1 || VarBit != (BeginVarBit - N))
         break;
       ++N;
-      --tmpBit;
+      --TmpBit;
     }
-    ++numOperandLits;
+    ++NumOperandLits;
   }
 
   unsigned BitOffset = -1;
-  for (; bit >= 0;) {
-    int varBit = getVariableBit(VarName, BI, bit);
+  for (; Bit >= 0;) {
+    int VarBit = getVariableBit(VarName, BI, Bit);
 
     // If this bit isn't from a variable, skip it.
-    if (varBit == -1) {
-      --bit;
+    if (VarBit == -1) {
+      --Bit;
       continue;
     }
 
     // Figure out the consecutive range of bits covered by this operand, in
     // order to generate better encoding code.
-    int beginInstBit = bit;
-    int beginVarBit = varBit;
+    int BeginInstBit = Bit;
+    int BeginVarBit = VarBit;
     int N = 1;
-    for (--bit; bit >= 0;) {
-      varBit = getVariableBit(VarName, BI, bit);
-      if (varBit == -1 || varBit != (beginVarBit - N))
+    for (--Bit; Bit >= 0;) {
+      VarBit = getVariableBit(VarName, BI, Bit);
+      if (VarBit == -1 || VarBit != (BeginVarBit - N))
         break;
       ++N;
-      --bit;
+      --Bit;
     }
 
-    std::string maskStr;
-    int opShift;
+    std::string MaskStr;
+    int OpShift;
 
-    unsigned loBit = beginVarBit - N + 1;
-    unsigned hiBit = loBit + N;
-    unsigned loInstBit = beginInstBit - N + 1;
-    BitOffset = loInstBit;
+    unsigned LoBit = BeginVarBit - N + 1;
+    unsigned HiBit = LoBit + N;
+    unsigned LoInstBit = BeginInstBit - N + 1;
+    BitOffset = LoInstBit;
     if (UseAPInt) {
-      std::string extractStr;
+      std::string ExtractStr;
       if (N >= 64) {
-        extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +
-                     itostr(loBit) + ")";
-        Case += "      Value.insertBits(" + extractStr + ", " +
-                itostr(loInstBit) + ");\n";
+        ExtractStr = "op.extractBits(" + itostr(HiBit - LoBit) + ", " +
+                     itostr(LoBit) + ")";
+        Case += "      Value.insertBits(" + ExtractStr + ", " +
+                itostr(LoInstBit) + ");\n";
       } else {
-        extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +
-                     ", " + itostr(loBit) + ")";
-        Case += "      Value.insertBits(" + extractStr + ", " +
-                itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";
+        ExtractStr = "op.extractBitsAsZExtValue(" + itostr(HiBit - LoBit) +
+                     ", " + itostr(LoBit) + ")";
+        Case += "      Value.insertBits(" + ExtractStr + ", " +
+                itostr(LoInstBit) + ", " + itostr(HiBit - LoBit) + ");\n";
       }
     } else {
-      uint64_t opMask = ~(uint64_t)0 >> (64 - N);
-      opShift = beginVarBit - N + 1;
-      opMask <<= opShift;
-      maskStr = "UINT64_C(" + utostr(opMask) + ")";
-      opShift = beginInstBit - beginVarBit;
-
-      if (numOperandLits == 1) {
-        Case += "      op &= " + maskStr + ";\n";
-        if (opShift > 0) {
-          Case += "      op <<= " + itostr(opShift) + ";\n";
-        } else if (opShift < 0) {
-          Case += "      op >>= " + itostr(-opShift) + ";\n";
+      uint64_t OpMask = ~(uint64_t)0 >> (64 - N);
+      OpShift = BeginVarBit - N + 1;
+      OpMask <<= OpShift;
+      MaskStr = "UINT64_C(" + utostr(OpMask) + ")";
+      OpShift = BeginInstBit - BeginVarBit;
+
+      if (NumOperandLits == 1) {
+        Case += "      op &= " + MaskStr + ";\n";
+        if (OpShift > 0) {
+          Case += "      op <<= " + itostr(OpShift) + ";\n";
+        } else if (OpShift < 0) {
+          Case += "      op >>= " + itostr(-OpShift) + ";\n";
         }
         Case += "      Value |= op;\n";
       } else {
-        if (opShift > 0) {
-          Case += "      Value |= (op & " + maskStr + ") << " +
-                  itostr(opShift) + ";\n";
-        } else if (opShift < 0) {
-          Case += "      Value |= (op & " + maskStr + ") >> " +
-                  itostr(-opShift) + ";\n";
+        if (OpShift > 0) {
+          Case += "      Value |= (op & " + MaskStr + ") << " +
+                  itostr(OpShift) + ";\n";
+        } else if (OpShift < 0) {
+          Case += "      Value |= (op & " + MaskStr + ") >> " +
+                  itostr(-OpShift) + ";\n";
         } else {
-          Case += "      Value |= (op & " + maskStr + ");\n";
+          Case += "      Value |= (op & " + MaskStr + ");\n";
         }
       }
     }
@@ -285,7 +285,7 @@ CodeEmitterGen::getInstructionCases(const Record *R,
                                     const CodeGenTarget &Target) {
   std::string Case, BitOffsetCase;
 
-  auto append = [&](const std::string &S) {
+  auto Append = [&](const std::string &S) {
     Case += S;
     BitOffsetCase += S;
   };
@@ -298,7 +298,7 @@ CodeEmitterGen::getInstructionCases(const Record *R,
       // Invoke the interface to obtain the HwMode ID controlling the
       // EncodingInfo for the current subtarget. This interface will
       // mask off irrelevant HwMode IDs.
-      append("      unsigned HwMode = "
+      Append("      unsigned HwMode = "
              "STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);\n");
       Case += "      switch (HwMode) {\n";
       Case += "      default: llvm_unreachable(\"Unknown hardware mode!\"); "
@@ -328,16 +328,16 @@ CodeEmitterGen::getInstructionCases(const Record *R,
         Case += "      Value = InstBitsByHw[opcode];\n";
       }
 
-      append("      switch (HwMode) {\n");
-      append("      default: llvm_unreachable(\"Unhandled HwMode\");\n");
+      Append("      switch (HwMode) {\n");
+      Append("      default: llvm_unreachable(\"Unhandled HwMode\");\n");
       for (auto &[ModeId, Encoding] : EBM) {
-        append("      case " + itostr(ModeId) + ": {\n");
+        Append("      case " + itostr(ModeId) + ": {\n");
         addInstructionCasesForEncoding(R, Encoding, Target, Case,
                                        BitOffsetCase);
-        append("      break;\n");
-        append("      }\n");
+        Append("      break;\n");
+        Append("      }\n");
       }
-      append("      }\n");
+      Append("      }\n");
       return std::pair(std::move(Case), std::move(BitOffsetCase));
     }
   }
@@ -397,13 +397,13 @@ static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
 }
 
 void CodeEmitterGen::emitInstructionBaseValues(
-    raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
+    raw_ostream &O, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
     const CodeGenTarget &Target, unsigned HwMode) {
   const CodeGenHwModes &HWM = Target.getHwModes();
   if (HwMode == DefaultMode)
-    o << "  static const uint64_t InstBits[] = {\n";
+    O << "  static const uint64_t InstBits[] = {\n";
   else
-    o << "  static const uint64_t InstBits_"
+    O << "  static const uint64_t InstBits_"
       << HWM.getModeName(HwMode, /*IncludeDefault=*/true) << "[] = {\n";
 
   for (const CodeGenInstruction *CGI : NumberedInstructions) {
@@ -411,9 +411,9 @@ void CodeEmitterGen::emitInstructionBaseValues(
 
     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
         R->getValueAsBit("isPseudo")) {
-      o << "    ";
-      emitInstBits(o, APInt(BitWidth, 0));
-      o << ",\n";
+      O << "    ";
+      emitInstBits(O, APInt(BitWidth, 0));
+      O << ",\n";
       continue;
     }
 
@@ -427,9 +427,9 @@ void CodeEmitterGen::emitInstructionBaseValues(
           // If the HwMode does not match, then Encoding '0'
           // should be generated.
           APInt Value(BitWidth, 0);
-          o << "    ";
-          emitInstBits(o, Value);
-          o << "," << '\t' << "// " << R->getName() << "\n";
+          O << "    ";
+          emitInstBits(O, Value);
+          O << "," << '\t' << "// " << R->getName() << "\n";
           continue;
         }
       }
@@ -438,37 +438,37 @@ void CodeEmitterGen::emitInstructionBaseValues(
 
     // Start by filling in fixed values.
     APInt Value(BitWidth, 0);
-    for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
-      if (const auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())
-        Value.setBit(i);
+    for (unsigned I = 0, E = BI->getNumBits(); I != E; ++I) {
+      if (const auto *B = dyn_cast<BitInit>(BI->getBit(I)); B && B->getValue())
+        Value.setBit(I);
     }
-    o << "    ";
-    emitInstBits(o, Value);
-    o << "," << '\t' << "// " << R->getName() << "\n";
+    O << "    ";
+    emitInstBits(O, Value);
+    O << "," << '\t' << "// " << R->getName() << "\n";
   }
-  o << "    UINT64_C(0)\n  };\n";
+  O << "    UINT64_C(0)\n  };\n";
 }
 
 void CodeEmitterGen::emitCaseMap(
-    raw_ostream &o,
+    raw_ostream &O,
     const std::map<std::string, std::vector<std::string>> &CaseMap) {
   for (const auto &[Case, InstList] : CaseMap) {
     bool First = true;
     for (const auto &Inst : InstList) {
       if (!First)
-        o << "\n";
-      o << "    case " << Inst << ":";
+        O << "\n";
+      O << "    case " << Inst << ":";
       First = false;
     }
-    o << " {\n";
-    o << Case;
-    o << "      break;\n"
+    O << " {\n";
+    O << Case;
+    O << "      break;\n"
       << "    }\n";
   }
 }
 
-void CodeEmitterGen::run(raw_ostream &o) {
-  emitSourceFileHeader("Machine Code Emitter", o);
+void CodeEmitterGen::run(raw_ostream &O) {
+  emitSourceFileHeader("Machine Code Emitter", O);
 
   CodeGenTarget Target(Records);
 
@@ -479,7 +479,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
       Target.getInstructionsByEnumValue();
 
   if (Target.hasVariableLengthEncodings()) {
-    emitVarLenCodeEmitter(Records, o);
+    emitVarLenCodeEmitter(Records, O);
   } else {
     const CodeGenHwModes &HWM = Target.getHwModes();
     // The set of HwModes used by instruction encodings.
@@ -509,31 +509,31 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
     // Emit function declaration
     if (UseAPInt) {
-      o << "void " << Target.getName()
+      O << "void " << Target.getName()
         << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
         << "    APInt &Inst,\n"
         << "    APInt &Scratch,\n"
         << "    const MCSubtargetInfo &STI) const {\n";
     } else {
-      o << "uint64_t " << Target.getName();
-      o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+      O << "uint64_t " << Target.getName();
+      O << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
         << "    SmallVectorImpl<MCFixup> &Fixups,\n"
         << "    const MCSubtargetInfo &STI) const {\n";
     }
 
     // Emit instruction base values
-    emitInstructionBaseValues(o, NumberedInstructions, Target, DefaultMode);
+    emitInstructionBaseValues(O, NumberedInstructions, Target, DefaultMode);
     if (!HwModes.empty()) {
       // Emit table for instrs whose encodings are controlled by HwModes.
       for (unsigned HwMode : HwModes) {
         if (HwMode == DefaultMode)
           continue;
-        emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode);
+        emitInstructionBaseValues(O, NumberedInstructions, Target, HwMode);
       }
 
       // This pointer will be assigned to the HwMode table later.
-      o << "  const uint64_t *InstBitsByHw;\n";
+      O << "  const uint64_t *InstBitsByHw;\n";
     }
 
     // Map to accumulate all the cases.
@@ -557,7 +557,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     // Emit initial function code
     if (UseAPInt) {
       int NumWords = APInt::getNumWords(BitWidth);
-      o << "  const unsigned opcode = MI.getOpcode();\n"
+      O << "  const unsigned opcode = MI.getOpcode();\n"
         << "  if (Scratch.getBitWidth() != " << BitWidth << ")\n"
         << "    Scratch = Scratch.zext(" << BitWidth << ");\n"
         << "  Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "
@@ -566,7 +566,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
         << "  APInt &op = Scratch;\n"
         << "  switch (opcode) {\n";
     } else {
-      o << "  const unsigned opcode = MI.getOpcode();\n"
+      O << "  const unsigned opcode = MI.getOpcode();\n"
         << "  uint64_t Value = InstBits[opcode];\n"
         << "  uint64_t op = 0;\n"
         << "  (void)op;  // suppress warning\n"
@@ -574,30 +574,30 @@ void CodeEmitterGen::run(raw_ostream &o) {
     }
 
     // Emit each case statement
-    emitCaseMap(o, CaseMap);
+    emitCaseMap(O, CaseMap);
 
     // Default case: unhandled opcode
-    o << "  default:\n"
+    O << "  default:\n"
       << "    std::string msg;\n"
       << "    raw_string_ostream Msg(msg);\n"
       << "    Msg << \"Not supported instr: \" << MI;\n"
       << "    report_fatal_error(Msg.str().c_str());\n"
       << "  }\n";
     if (UseAPInt)
-      o << "  Inst = Value;\n";
+      O << "  Inst = Value;\n";
     else
-      o << "  return Value;\n";
-    o << "}\n\n";
+      O << "  return Value;\n";
+    O << "}\n\n";
 
-    o << "#ifdef GET_OPERAND_BIT_OFFSET\n"
+    O << "#ifdef GET_OPERAND_BIT_OFFSET\n"
       << "#undef GET_OPERAND_BIT_OFFSET\n\n"
       << "uint32_t " << Target.getName()
       << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
       << "    unsigned OpNum,\n"
       << "    const MCSubtargetInfo &STI) const {\n"
       << "  switch (MI.getOpcode()) {\n";
-    emitCaseMap(o, BitOffsetCaseMap);
-    o << "  }\n"
+    emitCaseMap(O, BitOffsetCaseMap);
+    O << "  }\n"
       << "  std::string msg;\n"
       << "  raw_string_ostream Msg(msg);\n"
       << "  Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp
index 7876db6f33dfdff..8d22c0013dda881 100644
--- a/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -258,12 +258,12 @@ bool MapTableEmitter::isKeyColInstr(const Record *CurInstr) {
 
   // Check if the instruction is a KeyCol instruction.
   bool MatchFound = true;
-  for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound;
-       j++) {
+  for (unsigned J = 0, EndCf = ColFields->size(); (J < EndCf) && MatchFound;
+       J++) {
     const RecordVal *ColFieldName =
-        CurInstr->getValue(ColFields->getElement(j));
+        CurInstr->getValue(ColFields->getElement(J));
     std::string CurInstrVal = ColFieldName->getValue()->getAsUnquotedString();
-    std::string KeyColValue = KeyCol->getElement(j)->getAsUnquotedString();
+    std::string KeyColValue = KeyCol->getElement(J)->getAsUnquotedString();
     MatchFound = CurInstrVal == KeyColValue;
   }
   return MatchFound;
@@ -318,12 +318,12 @@ const Record *MapTableEmitter::getInstrForColumn(const Record *KeyInstr,
 
   for (const Record *CurInstr : RelatedInstrVec) {
     bool MatchFound = true;
-    for (unsigned j = 0, endCF = ColFields->size(); (j < endCF) && MatchFound;
-         j++) {
-      const Init *ColFieldJ = ColFields->getElement(j);
+    for (unsigned J = 0, EndCf = ColFields->size(); (J < EndCf) && MatchFound;
+         J++) {
+      const Init *ColFieldJ = ColFields->getElement(J);
       const Init *CurInstrInit = CurInstr->getValue(ColFieldJ)->getValue();
       std::string CurInstrVal = CurInstrInit->getAsUnquotedString();
-      const Init *ColFieldJVallue = CurValueCol->getElement(j);
+      const Init *ColFieldJVallue = CurValueCol->getElement(J);
       MatchFound = CurInstrVal == ColFieldJVallue->getAsUnquotedString();
     }
 
@@ -368,19 +368,19 @@ unsigned MapTableEmitter::emitBinSearchTable(raw_ostream &OS) {
   // Number of columns in the table are NumCol+1 because key instructions are
   // emitted as first column.
   OS << "Table[][" << NumCol + 1 << "] = {\n";
-  for (unsigned i = 0; i < TotalNumInstr; i++) {
-    const Record *CurInstr = NumberedInstructions[i]->TheDef;
+  for (unsigned I = 0; I < TotalNumInstr; I++) {
+    const Record *CurInstr = NumberedInstructions[I]->TheDef;
     ArrayRef<const Record *> ColInstrs = MapTable[CurInstr];
     std::string OutStr;
     unsigned RelExists = 0;
     if (!ColInstrs.empty()) {
-      for (unsigned j = 0; j < NumCol; j++) {
-        if (ColInstrs[j] != nullptr) {
+      for (unsigned J = 0; J < NumCol; J++) {
+        if (ColInstrs[J] != nullptr) {
           RelExists = 1;
           OutStr += ", ";
           OutStr += Namespace;
           OutStr += "::";
-          OutStr += ColInstrs[j]->getName();
+          OutStr += ColInstrs[J]->getName();
         } else {
           OutStr += ", (uint16_t)-1U";
         }
@@ -441,20 +441,20 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS, unsigned TableSize) {
   emitBinSearch(OS, TableSize);
 
   if (ValueCols.size() > 1) {
-    for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
-      const ListInit *ColumnI = ValueCols[i];
+    for (unsigned I = 0, E = ValueCols.size(); I < E; I++) {
+      const ListInit *ColumnI = ValueCols[I];
       OS << "  if (";
-      for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) {
-        std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
+      for (unsigned J = 0, ColSize = ColumnI->size(); J < ColSize; ++J) {
+        std::string ColName = ColFields->getElement(J)->getAsUnquotedString();
         OS << "in" << ColName;
         OS << " == ";
-        OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString();
-        if (j < ColumnI->size() - 1)
+        OS << ColName << "_" << ColumnI->getElement(J)->getAsUnquotedString();
+        if (J < ColumnI->size() - 1)
           OS << " && ";
       }
       OS << ")\n";
       OS << "    return " << InstrMapDesc.getName();
-      OS << "Table[mid][" << i + 1 << "];\n";
+      OS << "Table[mid][" << I + 1 << "];\n";
     }
     OS << "  return -1;";
   } else
@@ -509,8 +509,8 @@ static void emitEnums(raw_ostream &OS, const RecordKeeper &Records) {
     std::vector<const ListInit *> ValueCols;
     unsigned ListSize = List->size();
 
-    for (unsigned j = 0; j < ListSize; j++) {
-      const auto *ListJ = cast<ListInit>(List->getElement(j));
+    for (unsigned J = 0; J < ListSize; J++) {
+      const auto *ListJ = cast<ListInit>(List->getElement(J));
 
       if (ListJ->size() != ColFields->size())
         PrintFatalError("Record `" + CurMap->getName() +
@@ -520,10 +520,10 @@ static void emitEnums(raw_ostream &OS, const RecordKeeper &Records) {
       ValueCols.push_back(ListJ);
     }
 
-    for (unsigned j = 0, endCF = ColFields->size(); j < endCF; j++) {
-      for (unsigned k = 0; k < ListSize; k++) {
-        std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
-        ColFieldValueMap[ColName].push_back((ValueCols[k])->getElement(j));
+    for (unsigned J = 0, EndCf = ColFields->size(); J < EndCf; J++) {
+      for (unsigned K = 0; K < ListSize; K++) {
+        std::string ColName = ColFields->getElement(J)->getAsUnquotedString();
+        ColFieldValueMap[ColName].push_back((ValueCols[K])->getElement(J));
       }
     }
   }
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index d2228c902a56b47..f17c62dd1fd9d42 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -813,8 +813,8 @@ void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) const {
 
 void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
                                 const TypeSetByHwMode::SetType &Legal) const {
-  if (Out.count(MVT::iPTRAny)) {
-    Out.erase(MVT::iPTRAny);
+  if (Out.count(MVT::pAny)) {
+    Out.erase(MVT::pAny);
     Out.insert(MVT::iPTR);
   } else if (Out.count(MVT::iAny)) {
     Out.erase(MVT::iAny);
@@ -2461,7 +2461,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
       ValueTypeByHwMode VVT = TP.getInfer().getConcrete(Types[0], false);
       for (auto &P : VVT) {
         MVT::SimpleValueType VT = P.second.SimpleTy;
-        if (VT == MVT::iPTR || VT == MVT::iPTRAny)
+        // Can only check for types of a known size
+        if (VT == MVT::iPTR)
           continue;
         unsigned Size = MVT(VT).getFixedSizeInBits();
         // Make sure that the value is representable for this type.
diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.cpp b/llvm/utils/TableGen/Common/CodeGenTarget.cpp
index b358518c4290b0f..4e75db689a0b57a 100644
--- a/llvm/utils/TableGen/Common/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenTarget.cpp
@@ -47,19 +47,6 @@ MVT::SimpleValueType llvm::getValueType(const Record *Rec) {
   return (MVT::SimpleValueType)Rec->getValueAsInt("Value");
 }
 
-StringRef llvm::getName(MVT::SimpleValueType T) {
-  switch (T) {
-  case MVT::Other:
-    return "UNKNOWN";
-  case MVT::iPTR:
-    return "TLI.getPointerTy()";
-  case MVT::iPTRAny:
-    return "TLI.getPointerTy()";
-  default:
-    return getEnumName(T);
-  }
-}
-
 StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   // clang-format off
   switch (T) {
diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.h b/llvm/utils/TableGen/Common/CodeGenTarget.h
index c7b44f7028eb5bb..8bcb2f677a00b0b 100644
--- a/llvm/utils/TableGen/Common/CodeGenTarget.h
+++ b/llvm/utils/TableGen/Common/CodeGenTarget.h
@@ -46,7 +46,6 @@ class CodeGenSubRegIndex;
 /// record corresponds to.
 MVT::SimpleValueType getValueType(const Record *Rec);
 
-StringRef getName(MVT::SimpleValueType T);
 StringRef getEnumName(MVT::SimpleValueType T);
 
 /// getQualifiedName - Return the name of the specified record, with a
diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp
index d3b653b0fba27fa..3d39ee148373fd0 100644
--- a/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -55,8 +55,8 @@ static unsigned getResultPatternCost(TreePatternNode &P,
     if (II.usesCustomInserter)
       Cost += 10;
   }
-  for (unsigned i = 0, e = P.getNumChildren(); i != e; ++i)
-    Cost += getResultPatternCost(P.getChild(i), CGP);
+  for (unsigned I = 0, E = P.getNumChildren(); I != E; ++I)
+    Cost += getResultPatternCost(P.getChild(I), CGP);
   return Cost;
 }
 
@@ -72,8 +72,8 @@ static unsigned getResultPatternSize(TreePatternNode &P,
   if (Op->isSubClassOf("Instruction")) {
     Cost += Op->getValueAsInt("CodeSize");
   }
-  for (unsigned i = 0, e = P.getNumChildren(); i != e; ++i)
-    Cost += getResultPatternSize(P.getChild(i), CGP);
+  for (unsigned I = 0, E = P.getNumChildren(); I != E; ++I)
+    Cost += getResultPatternSize(P.getChild(I), CGP);
   return Cost;
 }
 
diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index 537bee55978bd6c..a6c0d09f69ba342 100644
--- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -105,7 +105,7 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
   for (const CodeGenProcModel *Model : ProcModels)
     ProcItinList.insert(Model->ItinsDef);
 
-  int totalFUs = 0;
+  int TotalFUs = 0;
   // Parse functional units for all the itineraries.
   for (const Record *Proc : ProcItinList) {
     std::vector<const Record *> FUs = Proc->getValueAsListOfDefs("FU");
@@ -123,10 +123,10 @@ int DFAPacketizerEmitter::collectAllFuncUnits(
       LLVM_DEBUG(dbgs() << " " << FUs[j]->getName() << ":0x"
                         << Twine::utohexstr(FuncResources));
     }
-    totalFUs += numFUs;
+    TotalFUs += numFUs;
     LLVM_DEBUG(dbgs() << "\n");
   }
-  return totalFUs;
+  return TotalFUs;
 }
 
 int DFAPacketizerEmitter::collectAllComboFuncs(
@@ -136,19 +136,19 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
   LLVM_DEBUG(dbgs() << "collectAllComboFuncs");
   LLVM_DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
 
-  int numCombos = 0;
-  for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
-    const Record *Func = ComboFuncList[i];
+  int NumCombos = 0;
+  for (unsigned I = 0, N = ComboFuncList.size(); I < N; ++I) {
+    const Record *Func = ComboFuncList[I];
     std::vector<const Record *> FUs = Func->getValueAsListOfDefs("CFD");
 
-    LLVM_DEBUG(dbgs() << "    CFD:" << i << " (" << FUs.size() << " combo FUs) "
+    LLVM_DEBUG(dbgs() << "    CFD:" << I << " (" << FUs.size() << " combo FUs) "
                       << Func->getName() << "\n");
 
     // Convert macros to bits for each stage.
-    for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
-      assert((j < DFA_MAX_RESOURCES) &&
+    for (unsigned J = 0, N = FUs.size(); J < N; ++J) {
+      assert((J < DFA_MAX_RESOURCES) &&
              "Exceeded maximum number of DFA resources");
-      const Record *FuncData = FUs[j];
+      const Record *FuncData = FUs[J];
       const Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
       const std::vector<const Record *> FuncList =
           FuncData->getValueAsListOfDefs("FuncList");
@@ -165,13 +165,13 @@ int DFAPacketizerEmitter::collectAllComboFuncs(
         ComboResources |= FuncResources;
       }
       ComboBitToBitsMap[ComboBit] = ComboResources;
-      numCombos++;
+      NumCombos++;
       LLVM_DEBUG(dbgs() << "          => combo bits: " << ComboFuncName << ":0x"
                         << Twine::utohexstr(ComboBit) << " = 0x"
                         << Twine::utohexstr(ComboResources) << "\n");
     }
   }
-  return numCombos;
+  return NumCombos;
 }
 
 ResourceVector
@@ -271,7 +271,7 @@ void DFAPacketizerEmitter::emitForItineraries(
 
   // Given a resource state, return all resource states by applying
   // InsnClass.
-  auto applyInsnClass = [&](const ResourceVector &InsnClass,
+  auto ApplyInsnClass = [&](const ResourceVector &InsnClass,
                             NfaStateTy State) -> std::deque<NfaStateTy> {
     std::deque<NfaStateTy> V(1, State);
     // Apply every stage in the class individually.
@@ -304,7 +304,7 @@ void DFAPacketizerEmitter::emitForItineraries(
 
   // Given a resource state, return a quick (conservative) guess as to whether
   // InsnClass can be applied. This is a filter for the more heavyweight
-  // applyInsnClass.
+  // ApplyInsnClass.
   auto canApplyInsnClass = [](const ResourceVector &InsnClass,
                               NfaStateTy State) -> bool {
     for (NfaStateTy Resources : InsnClass) {
@@ -325,7 +325,7 @@ void DFAPacketizerEmitter::emitForItineraries(
       if (!canApplyInsnClass(Resources, State))
         continue;
       unsigned ResourcesID = UniqueResources.idFor(Resources);
-      for (uint64_t NewState : applyInsnClass(Resources, State)) {
+      for (uint64_t NewState : ApplyInsnClass(Resources, State)) {
         if (SeenStates.emplace(NewState).second)
           Worklist.emplace_back(NewState);
         Emitter.addTransition(State, NewState, ResourcesID);
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 0598baea9be7a23..8bebe608eece47d 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -14,7 +14,6 @@
 #include "Basic/SequenceToOffsetTable.h"
 #include "Common/CodeGenTarget.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
@@ -33,6 +32,20 @@ using namespace llvm::dxil;
 
 namespace {
 
+struct DXILArgSelect {
+  enum class Type {
+    Index,
+    I32,
+    I8,
+  };
+  Type Type = Type::Index;
+  int Value = -1;
+};
+struct DXILIntrinsicSelect {
+  StringRef Intrinsic;
+  SmallVector<DXILArgSelect, 4> Args;
+};
+
 struct DXILOperationDesc {
   std::string OpName; // name of DXIL operation
   int OpCode;         // ID of DXIL operation
@@ -43,13 +56,12 @@ struct DXILOperationDesc {
   SmallVector<const Record *> OverloadRecs;
   SmallVector<const Record *> StageRecs;
   SmallVector<const Record *> AttrRecs;
-  StringRef Intrinsic; // The llvm intrinsic map to OpName. Default is "" which
-                       // means no map exists
+  SmallVector<DXILIntrinsicSelect> IntrinsicSelects;
   SmallVector<StringRef, 4>
       ShaderStages; // shader stages to which this applies, empty for all.
   int OverloadParamIndex;             // Index of parameter with overload type.
                                       //   -1 : no overload types
-  SmallVector<StringRef, 4> counters; // counters for this inst.
+  SmallVector<StringRef, 4> Counters; // counters for this inst.
   DXILOperationDesc(const Record *);
 };
 } // end anonymous namespace
@@ -57,7 +69,7 @@ struct DXILOperationDesc {
 /// In-place sort TableGen records of class with a field
 ///    Version dxil_version
 /// in the ascending version order.
-static void AscendingSortByVersion(std::vector<const Record *> &Recs) {
+static void ascendingSortByVersion(std::vector<const Record *> &Recs) {
   sort(Recs, [](const Record *RecA, const Record *RecB) {
     unsigned RecAMaj =
         RecA->getValueAsDef("dxil_version")->getValueAsInt("Major");
@@ -72,6 +84,21 @@ static void AscendingSortByVersion(std::vector<const Record *> &Recs) {
   });
 }
 
+/// Take a `int_{intrinsic_name}` and return just the intrinsic_name part if
+/// available. Otherwise return the empty string.
+static StringRef GetIntrinsicName(const RecordVal *RV) {
+  if (RV && RV->getValue()) {
+    if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
+      auto *IntrinsicDef = DI->getDef();
+      auto DefName = IntrinsicDef->getName();
+      assert(DefName.starts_with("int_") && "invalid intrinsic name");
+      // Remove the int_ from intrinsic name.
+      return DefName.substr(4);
+    }
+  }
+  return "";
+}
+
 /// Construct an object using the DXIL Operation records specified
 /// in DXIL.td. This serves as the single source of reference of
 /// the information extracted from the specified Record R, for
@@ -98,8 +125,8 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
   // the comment before the definition of class LLVMMatchType in
   // llvm/IR/Intrinsics.td
   OverloadParamIndex = -1; // A sigil meaning none.
-  for (unsigned i = 0; i < ParamTypeRecsSize; i++) {
-    const Record *TR = ParamTypeRecs[i];
+  for (unsigned I = 0; I < ParamTypeRecsSize; I++) {
+    const Record *TR = ParamTypeRecs[I];
     // Track operation parameter indices of any overload types
     if (TR->getValueAsInt("isOverload")) {
       if (OverloadParamIndex != -1) {
@@ -110,7 +137,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
       // Keep the earliest parameter index we see, but if it was the return type
       // overwrite it with the first overloaded argument.
       if (OverloadParamIndex <= 0)
-        OverloadParamIndex = i;
+        OverloadParamIndex = I;
     }
     OpTypes.emplace_back(TR);
   }
@@ -119,7 +146,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
   std::vector<const Record *> Recs = R->getValueAsListOfDefs("overloads");
 
   // Sort records in ascending order of DXIL version
-  AscendingSortByVersion(Recs);
+  ascendingSortByVersion(Recs);
 
   for (const Record *CR : Recs) {
     OverloadRecs.push_back(CR);
@@ -134,7 +161,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
   }
 
   // Sort records in ascending order of DXIL version
-  AscendingSortByVersion(Recs);
+  ascendingSortByVersion(Recs);
 
   for (const Record *CR : Recs) {
     StageRecs.push_back(CR);
@@ -144,7 +171,7 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
   Recs = R->getValueAsListOfDefs("attributes");
 
   // Sort records in ascending order of DXIL version
-  AscendingSortByVersion(Recs);
+  ascendingSortByVersion(Recs);
 
   for (const Record *CR : Recs) {
     AttrRecs.push_back(CR);
@@ -158,14 +185,63 @@ DXILOperationDesc::DXILOperationDesc(const Record *R) {
                            OpName);
   }
 
-  const RecordVal *RV = R->getValue("LLVMIntrinsic");
-  if (RV && RV->getValue()) {
-    if (const DefInit *DI = dyn_cast<DefInit>(RV->getValue())) {
-      auto *IntrinsicDef = DI->getDef();
-      auto DefName = IntrinsicDef->getName();
-      assert(DefName.starts_with("int_") && "invalid intrinsic name");
-      // Remove the int_ from intrinsic name.
-      Intrinsic = DefName.substr(4);
+  {
+    DXILIntrinsicSelect IntrSelect;
+    IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("LLVMIntrinsic"));
+    if (IntrSelect.Intrinsic.size())
+      IntrinsicSelects.emplace_back(std::move(IntrSelect));
+  }
+
+  auto IntrinsicSelectRecords = R->getValueAsListOfDefs("intrinsic_selects");
+  if (IntrinsicSelectRecords.size()) {
+    if (IntrinsicSelects.size()) {
+      PrintFatalError(
+          R, Twine("LLVMIntrinsic and intrinsic_selects cannot be both "
+                   "defined for DXIL operation - ") +
+                 OpName);
+    } else {
+      for (const Record *R : IntrinsicSelectRecords) {
+        DXILIntrinsicSelect IntrSelect;
+        IntrSelect.Intrinsic = GetIntrinsicName(R->getValue("intrinsic"));
+        auto Args = R->getValueAsListOfDefs("args");
+        for (const Record *Arg : Args) {
+          bool IsI8 = Arg->getValueAsBit("is_i8");
+          bool IsI32 = Arg->getValueAsBit("is_i32");
+          int Index = Arg->getValueAsInt("index");
+          const Record *ValueRec = Arg->getValueAsOptionalDef("value");
+
+          DXILArgSelect ArgSelect;
+          if (IsI8) {
+            if (!ValueRec) {
+              PrintFatalError(R, Twine("'value' must be defined for i8 "
+                                       "ArgSelect for DXIL operation - ") +
+                                     OpName);
+            }
+            ArgSelect.Type = DXILArgSelect::Type::I8;
+            ArgSelect.Value = ValueRec->getValueAsInt("value");
+          } else if (IsI32) {
+            if (!ValueRec) {
+              PrintFatalError(R, Twine("'value' must be defined for i32 "
+                                       "ArgSelect for DXIL operation - ") +
+                                     OpName);
+            }
+            ArgSelect.Type = DXILArgSelect::Type::I32;
+            ArgSelect.Value = ValueRec->getValueAsInt("value");
+          } else {
+            if (Index < 0) {
+              PrintFatalError(
+                  R, Twine("Index in ArgSelect<index> must be equal to or "
+                           "greater than 0 for DXIL operation - ") +
+                         OpName);
+            }
+            ArgSelect.Type = DXILArgSelect::Type::Index;
+            ArgSelect.Value = Index;
+          }
+
+          IntrSelect.Args.emplace_back(std::move(ArgSelect));
+        }
+        IntrinsicSelects.emplace_back(std::move(IntrSelect));
+      }
     }
   }
 }
@@ -210,7 +286,7 @@ static std::string getOverloadMaskString(ArrayRef<const Record *> Recs) {
   if (Recs.empty()) {
     MaskString.append("{{1, 0}, OverloadKind::UNDEFINED}}");
   } else {
-    for (auto Rec : Recs) {
+    for (const auto *Rec : Recs) {
       unsigned Major =
           Rec->getValueAsDef("dxil_version")->getValueAsInt("Major");
       unsigned Minor =
@@ -256,7 +332,7 @@ static std::string getStageMaskString(ArrayRef<const Record *> Recs) {
                     "operation must be specified");
   }
 
-  for (auto Rec : Recs) {
+  for (const auto *Rec : Recs) {
     unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major");
     unsigned Minor = Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor");
     MaskString.append(Prefix)
@@ -294,7 +370,7 @@ static std::string getAttributeMaskString(ArrayRef<const Record *> Recs) {
   std::string Prefix = "";
   MaskString.append("{");
 
-  for (auto Rec : Recs) {
+  for (const auto *Rec : Recs) {
     unsigned Major = Rec->getValueAsDef("dxil_version")->getValueAsInt("Major");
     unsigned Minor = Rec->getValueAsDef("dxil_version")->getValueAsInt("Minor");
     MaskString.append(Prefix)
@@ -378,10 +454,29 @@ static void emitDXILIntrinsicMap(ArrayRef<DXILOperationDesc> Ops,
   OS << "#ifdef DXIL_OP_INTRINSIC\n";
   OS << "\n";
   for (const auto &Op : Ops) {
-    if (Op.Intrinsic.empty())
+    if (Op.IntrinsicSelects.empty()) {
       continue;
-    OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
-       << ", Intrinsic::" << Op.Intrinsic << ")\n";
+    }
+    for (const DXILIntrinsicSelect &MappedIntr : Op.IntrinsicSelects) {
+      OS << "DXIL_OP_INTRINSIC(dxil::OpCode::" << Op.OpName
+         << ", Intrinsic::" << MappedIntr.Intrinsic;
+      for (const DXILArgSelect &ArgSelect : MappedIntr.Args) {
+        OS << ", (ArgSelect { ";
+        switch (ArgSelect.Type) {
+        case DXILArgSelect::Type::Index:
+          OS << "ArgSelect::Type::Index, ";
+          break;
+        case DXILArgSelect::Type::I8:
+          OS << "ArgSelect::Type::I8, ";
+          break;
+        case DXILArgSelect::Type::I32:
+          OS << "ArgSelect::Type::I32, ";
+          break;
+        }
+        OS << ArgSelect.Value << "})";
+      }
+      OS << ")\n";
+    }
   }
   OS << "\n";
   OS << "#undef DXIL_OP_INTRINSIC\n";
@@ -481,21 +576,21 @@ static void emitDXILOperationTableDataStructs(const RecordKeeper &Records,
   size_t ShaderKindCount = ShaderKindRecs.size();
   uint64_t ShaderKindTySz = PowerOf2Ceil(ShaderKindRecs.size() + 1);
   OS << "enum ShaderKind : uint" << ShaderKindTySz << "_t {\n";
-  const std::string allStages("all_stages");
-  const std::string removed("removed");
-  int shiftVal = 1;
-  for (auto R : ShaderKindRecs) {
+  const std::string AllStages("all_stages");
+  const std::string Removed("removed");
+  int ShiftVal = 1;
+  for (const auto *R : ShaderKindRecs) {
     auto Name = R->getName();
-    if (Name.compare(removed) == 0) {
+    if (Name.compare(Removed) == 0) {
       OS << "  " << Name
          << " =  0,  // Pseudo-stage indicating op not supported in any "
             "stage\n";
-    } else if (Name.compare(allStages) == 0) {
+    } else if (Name.compare(AllStages) == 0) {
       OS << "  " << Name << " =  0x"
          << utohexstr(((1 << ShaderKindCount) - 1), false, 0)
          << ", // Pseudo-stage indicating op is supported in all stages\n";
-    } else if (Name.compare(allStages)) {
-      OS << "  " << Name << " = 1 << " << std::to_string(shiftVal++) << ",\n";
+    } else if (Name.compare(AllStages)) {
+      OS << "  " << Name << " = 1 << " << std::to_string(ShiftVal++) << ",\n";
     }
   }
   OS << "}; // enum ShaderKind\n\n";
@@ -504,7 +599,7 @@ static void emitDXILOperationTableDataStructs(const RecordKeeper &Records,
 /// Entry function call that invokes the functionality of this TableGen backend
 /// \param Records TableGen records of DXIL Operations defined in DXIL.td
 /// \param OS output stream
-static void EmitDXILOperation(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitDxilOperation(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "// Generated code, do not edit.\n";
   OS << "\n";
   // Get all DXIL Ops property records
@@ -536,5 +631,5 @@ static void EmitDXILOperation(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "#endif\n\n";
 }
 
-static TableGen::Emitter::Opt X("gen-dxil-operation", EmitDXILOperation,
+static TableGen::Emitter::Opt X("gen-dxil-operation", emitDxilOperation,
                                 "Generate DXIL operation information");
diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index 9dc29d8262fa2ca..fd815f4a31dad8d 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -46,7 +46,7 @@ class IfDefScope {
 
 // Generate enum class. Entries are emitted in the order in which they appear
 // in the `Records` vector.
-static void GenerateEnumClass(ArrayRef<const Record *> Records, raw_ostream &OS,
+static void generateEnumClass(ArrayRef<const Record *> Records, raw_ostream &OS,
                               StringRef Enum, StringRef Prefix,
                               const DirectiveLanguage &DirLang,
                               bool ExportEnums) {
@@ -79,7 +79,7 @@ static void GenerateEnumClass(ArrayRef<const Record *> Records, raw_ostream &OS,
 
 // Generate enums for values that clauses can take.
 // Also generate function declarations for get<Enum>Name(StringRef Str).
-static void GenerateEnumClauseVal(ArrayRef<const Record *> Records,
+static void generateEnumClauseVal(ArrayRef<const Record *> Records,
                                   raw_ostream &OS,
                                   const DirectiveLanguage &DirLang,
                                   std::string &EnumHelperFuncs) {
@@ -121,13 +121,13 @@ static void GenerateEnumClauseVal(ArrayRef<const Record *> Records,
   }
 }
 
-static bool HasDuplicateClauses(ArrayRef<const Record *> Clauses,
+static bool hasDuplicateClauses(ArrayRef<const Record *> Clauses,
                                 const Directive &Directive,
                                 StringSet<> &CrtClauses) {
   bool HasError = false;
   for (const VersionedClause VerClause : Clauses) {
-    const auto insRes = CrtClauses.insert(VerClause.getClause().getName());
-    if (!insRes.second) {
+    const auto InsRes = CrtClauses.insert(VerClause.getClause().getName());
+    if (!InsRes.second) {
       PrintError("Clause " + VerClause.getClause().getRecordName() +
                  " already defined on directive " + Directive.getRecordName());
       HasError = true;
@@ -140,20 +140,20 @@ static bool HasDuplicateClauses(ArrayRef<const Record *> Clauses,
 // three allowed list. Also, since required implies allowed, clauses cannot
 // appear in both the allowedClauses and requiredClauses lists.
 static bool
-HasDuplicateClausesInDirectives(ArrayRef<const Record *> Directives) {
+hasDuplicateClausesInDirectives(ArrayRef<const Record *> Directives) {
   bool HasDuplicate = false;
   for (const Directive Dir : Directives) {
     StringSet<> Clauses;
     // Check for duplicates in the three allowed lists.
-    if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
-        HasDuplicateClauses(Dir.getAllowedOnceClauses(), Dir, Clauses) ||
-        HasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses)) {
+    if (hasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
+        hasDuplicateClauses(Dir.getAllowedOnceClauses(), Dir, Clauses) ||
+        hasDuplicateClauses(Dir.getAllowedExclusiveClauses(), Dir, Clauses)) {
       HasDuplicate = true;
     }
     // Check for duplicate between allowedClauses and required
     Clauses.clear();
-    if (HasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
-        HasDuplicateClauses(Dir.getRequiredClauses(), Dir, Clauses)) {
+    if (hasDuplicateClauses(Dir.getAllowedClauses(), Dir, Clauses) ||
+        hasDuplicateClauses(Dir.getRequiredClauses(), Dir, Clauses)) {
       HasDuplicate = true;
     }
     if (HasDuplicate)
@@ -173,11 +173,11 @@ bool DirectiveLanguage::HasValidityErrors() const {
     return true;
   }
 
-  return HasDuplicateClausesInDirectives(getDirectives());
+  return hasDuplicateClausesInDirectives(getDirectives());
 }
 
 // Count the maximum number of leaf constituents per construct.
-static size_t GetMaxLeafCount(const DirectiveLanguage &DirLang) {
+static size_t getMaxLeafCount(const DirectiveLanguage &DirLang) {
   size_t MaxCount = 0;
   for (const Directive D : DirLang.getDirectives())
     MaxCount = std::max(MaxCount, D.getLeafConstructs().size());
@@ -186,7 +186,7 @@ static size_t GetMaxLeafCount(const DirectiveLanguage &DirLang) {
 
 // Generate the declaration section for the enumeration in the directive
 // language.
-static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
   const auto DirLang = DirectiveLanguage(Records);
   if (DirLang.HasValidityErrors())
     return;
@@ -214,29 +214,29 @@ static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
     OS << "\nLLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();\n";
 
   // Emit Directive associations
-  std::vector<const Record *> associations;
-  copy_if(DirLang.getAssociations(), std::back_inserter(associations),
+  std::vector<const Record *> Associations;
+  copy_if(DirLang.getAssociations(), std::back_inserter(Associations),
           // Skip the "special" value
           [](const Record *Def) { return Def->getName() != "AS_FromLeaves"; });
-  GenerateEnumClass(associations, OS, "Association",
+  generateEnumClass(Associations, OS, "Association",
                     /*Prefix=*/"", DirLang, /*ExportEnums=*/false);
 
-  GenerateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"",
+  generateEnumClass(DirLang.getCategories(), OS, "Category", /*Prefix=*/"",
                     DirLang, /*ExportEnums=*/false);
 
   // Emit Directive enumeration
-  GenerateEnumClass(DirLang.getDirectives(), OS, "Directive",
+  generateEnumClass(DirLang.getDirectives(), OS, "Directive",
                     DirLang.getDirectivePrefix(), DirLang,
                     DirLang.hasMakeEnumAvailableInNamespace());
 
   // Emit Clause enumeration
-  GenerateEnumClass(DirLang.getClauses(), OS, "Clause",
+  generateEnumClass(DirLang.getClauses(), OS, "Clause",
                     DirLang.getClausePrefix(), DirLang,
                     DirLang.hasMakeEnumAvailableInNamespace());
 
   // Emit ClauseVal enumeration
   std::string EnumHelperFuncs;
-  GenerateEnumClauseVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs);
+  generateEnumClauseVal(DirLang.getClauses(), OS, DirLang, EnumHelperFuncs);
 
   // Generic function signatures
   OS << "\n";
@@ -259,7 +259,7 @@ static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
      << "Clause C, unsigned Version);\n";
   OS << "\n";
   OS << "constexpr std::size_t getMaxLeafCount() { return "
-     << GetMaxLeafCount(DirLang) << "; }\n";
+     << getMaxLeafCount(DirLang) << "; }\n";
   OS << "LLVM_ABI Association getDirectiveAssociation(Directive D);\n";
   OS << "LLVM_ABI Category getDirectiveCategory(Directive D);\n";
   if (EnumHelperFuncs.length() > 0) {
@@ -277,7 +277,7 @@ static void EmitDirectivesDecl(const RecordKeeper &Records, raw_ostream &OS) {
 }
 
 // Generate function implementation for get<Enum>Name(StringRef Str)
-static void GenerateGetName(ArrayRef<const Record *> Records, raw_ostream &OS,
+static void generateGetName(ArrayRef<const Record *> Records, raw_ostream &OS,
                             StringRef Enum, const DirectiveLanguage &DirLang,
                             StringRef Prefix) {
   OS << "\n";
@@ -300,11 +300,11 @@ static void GenerateGetName(ArrayRef<const Record *> Records, raw_ostream &OS,
 }
 
 // Generate function implementation for get<Enum>Kind(StringRef Str)
-static void GenerateGetKind(ArrayRef<const Record *> Records, raw_ostream &OS,
+static void generateGetKind(ArrayRef<const Record *> Records, raw_ostream &OS,
                             StringRef Enum, const DirectiveLanguage &DirLang,
                             StringRef Prefix, bool ImplicitAsUnknown) {
 
-  auto DefaultIt = find_if(
+  const auto *DefaultIt = find_if(
       Records, [](const Record *R) { return R->getValueAsBit("isDefault"); });
 
   if (DefaultIt == Records.end()) {
@@ -334,7 +334,7 @@ static void GenerateGetKind(ArrayRef<const Record *> Records, raw_ostream &OS,
 }
 
 // Generate function implementation for get<ClauseVal>Kind(StringRef Str)
-static void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang,
+static void generateGetKindClauseVal(const DirectiveLanguage &DirLang,
                                      raw_ostream &OS) {
   for (const Clause C : DirLang.getClauses()) {
     const auto &ClauseVals = C.getClauseVals();
@@ -389,7 +389,7 @@ static void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang,
   }
 }
 
-static void GenerateCaseForVersionedClauses(ArrayRef<const Record *> Clauses,
+static void generateCaseForVersionedClauses(ArrayRef<const Record *> Clauses,
                                             raw_ostream &OS,
                                             StringRef DirectiveName,
                                             const DirectiveLanguage &DirLang,
@@ -406,7 +406,7 @@ static void GenerateCaseForVersionedClauses(ArrayRef<const Record *> Clauses,
   }
 }
 
-static std::string GetDirectiveName(const DirectiveLanguage &DirLang,
+static std::string getDirectiveName(const DirectiveLanguage &DirLang,
                                     const Record *Rec) {
   Directive Dir(Rec);
   return (Twine("llvm::") + DirLang.getCppNamespace() +
@@ -414,12 +414,12 @@ static std::string GetDirectiveName(const DirectiveLanguage &DirLang,
       .str();
 }
 
-static std::string GetDirectiveType(const DirectiveLanguage &DirLang) {
+static std::string getDirectiveType(const DirectiveLanguage &DirLang) {
   return (Twine("llvm::") + DirLang.getCppNamespace() + "::Directive").str();
 }
 
 // Generate the isAllowedClauseForDirective function implementation.
-static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
+static void generateIsAllowedClause(const DirectiveLanguage &DirLang,
                                     raw_ostream &OS) {
   OS << "\n";
   OS << "bool llvm::" << DirLang.getCppNamespace()
@@ -445,16 +445,16 @@ static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
 
       StringSet<> Cases;
 
-      GenerateCaseForVersionedClauses(Dir.getAllowedClauses(), OS,
+      generateCaseForVersionedClauses(Dir.getAllowedClauses(), OS,
                                       Dir.getName(), DirLang, Cases);
 
-      GenerateCaseForVersionedClauses(Dir.getAllowedOnceClauses(), OS,
+      generateCaseForVersionedClauses(Dir.getAllowedOnceClauses(), OS,
                                       Dir.getName(), DirLang, Cases);
 
-      GenerateCaseForVersionedClauses(Dir.getAllowedExclusiveClauses(), OS,
+      generateCaseForVersionedClauses(Dir.getAllowedExclusiveClauses(), OS,
                                       Dir.getName(), DirLang, Cases);
 
-      GenerateCaseForVersionedClauses(Dir.getRequiredClauses(), OS,
+      generateCaseForVersionedClauses(Dir.getRequiredClauses(), OS,
                                       Dir.getName(), DirLang, Cases);
 
       OS << "        default:\n";
@@ -470,7 +470,7 @@ static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
   OS << "}\n"; // End of function isAllowedClauseForDirective
 }
 
-static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS,
+static void emitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS,
                           StringRef TableName) {
   // The leaf constructs are emitted in a form of a 2D table, where each
   // row corresponds to a directive (and there is a row for each directive).
@@ -498,7 +498,7 @@ static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS,
     DirId.insert(std::make_pair(Rec, Idx));
 
   using LeafList = std::vector<int>;
-  int MaxLeafCount = GetMaxLeafCount(DirLang);
+  int MaxLeafCount = getMaxLeafCount(DirLang);
 
   // The initial leaf table, rows order is same as directive order.
   std::vector<LeafList> LeafTable(Directives.size());
@@ -560,19 +560,19 @@ static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS,
   // type is `int` (by default). The code above uses `int` to store directive
   // ids, so make sure that we catch it when something changes in the
   // underlying type.
-  std::string DirectiveType = GetDirectiveType(DirLang);
+  std::string DirectiveType = getDirectiveType(DirLang);
   OS << "\nstatic_assert(sizeof(" << DirectiveType << ") == sizeof(int));\n";
 
   OS << "[[maybe_unused]] static const " << DirectiveType << ' ' << TableName
      << "[][" << MaxLeafCount + 2 << "] = {\n";
   for (size_t I = 0, E = Directives.size(); I != E; ++I) {
     auto &Leaves = LeafTable[Ordering[I]];
-    OS << "    {" << GetDirectiveName(DirLang, Directives[Leaves[0]]);
+    OS << "    {" << getDirectiveName(DirLang, Directives[Leaves[0]]);
     OS << ", static_cast<" << DirectiveType << ">(" << Leaves[1] << "),";
     for (size_t I = 2, E = Leaves.size(); I != E; ++I) {
       int Idx = Leaves[I];
       if (Idx >= 0)
-        OS << ' ' << GetDirectiveName(DirLang, Directives[Leaves[I]]) << ',';
+        OS << ' ' << getDirectiveName(DirLang, Directives[Leaves[I]]) << ',';
       else
         OS << " static_cast<" << DirectiveType << ">(-1),";
     }
@@ -600,7 +600,7 @@ static void EmitLeafTable(const DirectiveLanguage &DirLang, raw_ostream &OS,
   OS << "\n};\n";
 }
 
-static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
+static void generateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
                                             raw_ostream &OS) {
   enum struct Association {
     None = 0, // None should be the smallest value.
@@ -613,10 +613,10 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
     Invalid,
   };
 
-  ArrayRef<const Record *> associations = DirLang.getAssociations();
+  ArrayRef<const Record *> Associations = DirLang.getAssociations();
 
-  auto getAssocValue = [](StringRef name) -> Association {
-    return StringSwitch<Association>(name)
+  auto GetAssocValue = [](StringRef Name) -> Association {
+    return StringSwitch<Association>(Name)
         .Case("AS_Block", Association::Block)
         .Case("AS_Declaration", Association::Declaration)
         .Case("AS_Delimited", Association::Delimited)
@@ -627,24 +627,24 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
         .Default(Association::Invalid);
   };
 
-  auto getAssocName = [&](Association A) -> StringRef {
+  auto GetAssocName = [&](Association A) -> StringRef {
     if (A != Association::Invalid && A != Association::FromLeaves) {
-      auto F = find_if(associations, [&](const Record *R) {
-        return getAssocValue(R->getName()) == A;
+      const auto *F = find_if(Associations, [&](const Record *R) {
+        return GetAssocValue(R->getName()) == A;
       });
-      if (F != associations.end())
+      if (F != Associations.end())
         return (*F)->getValueAsString("name"); // enum name
     }
     llvm_unreachable("Unexpected association value");
   };
 
-  auto errorPrefixFor = [&](Directive D) -> std::string {
+  auto ErrorPrefixFor = [&](Directive D) -> std::string {
     return (Twine("Directive '") + D.getName() + "' in namespace '" +
             DirLang.getCppNamespace() + "' ")
         .str();
   };
 
-  auto reduce = [&](Association A, Association B) -> Association {
+  auto Reduce = [&](Association A, Association B) -> Association {
     if (A > B)
       std::swap(A, B);
 
@@ -663,14 +663,14 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
 
   DenseMap<const Record *, Association> AsMap;
 
-  auto compAssocImpl = [&](const Record *R, auto &&Self) -> Association {
+  auto CompAssocImpl = [&](const Record *R, auto &&Self) -> Association {
     if (auto F = AsMap.find(R); F != AsMap.end())
       return F->second;
 
     Directive D(R);
-    Association AS = getAssocValue(D.getAssociation()->getName());
+    Association AS = GetAssocValue(D.getAssociation()->getName());
     if (AS == Association::Invalid) {
-      PrintFatalError(errorPrefixFor(D) +
+      PrintFatalError(ErrorPrefixFor(D) +
                       "has an unrecognized value for association: '" +
                       D.getAssociation()->getName() + "'");
     }
@@ -679,22 +679,22 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
       return AS;
     }
     // Compute the association from leaf constructs.
-    std::vector<const Record *> leaves = D.getLeafConstructs();
-    if (leaves.empty()) {
+    std::vector<const Record *> Leaves = D.getLeafConstructs();
+    if (Leaves.empty()) {
       errs() << D.getName() << '\n';
-      PrintFatalError(errorPrefixFor(D) +
+      PrintFatalError(ErrorPrefixFor(D) +
                       "requests association to be computed from leaves, "
                       "but it has no leaves");
     }
 
-    Association Result = Self(leaves[0], Self);
-    for (int I = 1, E = leaves.size(); I < E; ++I) {
-      Association A = Self(leaves[I], Self);
-      Association R = reduce(Result, A);
+    Association Result = Self(Leaves[0], Self);
+    for (int I = 1, E = Leaves.size(); I < E; ++I) {
+      Association A = Self(Leaves[I], Self);
+      Association R = Reduce(Result, A);
       if (R == Association::Invalid) {
-        PrintFatalError(errorPrefixFor(D) +
+        PrintFatalError(ErrorPrefixFor(D) +
                         "has leaves with incompatible association values: " +
-                        getAssocName(A) + " and " + getAssocName(R));
+                        GetAssocName(A) + " and " + GetAssocName(R));
       }
       Result = R;
     }
@@ -706,11 +706,11 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
   };
 
   for (const Record *R : DirLang.getDirectives())
-    compAssocImpl(R, compAssocImpl); // Updates AsMap.
+    CompAssocImpl(R, CompAssocImpl); // Updates AsMap.
 
   OS << '\n';
 
-  auto getQualifiedName = [&](StringRef Formatted) -> std::string {
+  auto GetQualifiedName = [&](StringRef Formatted) -> std::string {
     return (Twine("llvm::") + DirLang.getCppNamespace() +
             "::Directive::" + DirLang.getDirectivePrefix() + Formatted)
         .str();
@@ -727,9 +727,9 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
   for (const Record *R : DirLang.getDirectives()) {
     if (auto F = AsMap.find(R); F != AsMap.end()) {
       Directive Dir(R);
-      OS << "  case " << getQualifiedName(Dir.getFormattedName()) << ":\n";
+      OS << "  case " << GetQualifiedName(Dir.getFormattedName()) << ":\n";
       OS << "    return " << AssociationTypeName
-         << "::" << getAssocName(F->second) << ";\n";
+         << "::" << GetAssocName(F->second) << ";\n";
     }
   }
   OS << "  } // switch (Dir)\n";
@@ -737,7 +737,7 @@ static void GenerateGetDirectiveAssociation(const DirectiveLanguage &DirLang,
   OS << "}\n";
 }
 
-static void GenerateGetDirectiveCategory(const DirectiveLanguage &DirLang,
+static void generateGetDirectiveCategory(const DirectiveLanguage &DirLang,
                                          raw_ostream &OS) {
   std::string LangNamespace = "llvm::" + DirLang.getCppNamespace().str();
   std::string CategoryTypeName = LangNamespace + "::Category";
@@ -745,12 +745,12 @@ static void GenerateGetDirectiveCategory(const DirectiveLanguage &DirLang,
 
   OS << '\n';
   OS << CategoryTypeName << ' ' << LangNamespace << "::getDirectiveCategory("
-     << GetDirectiveType(DirLang) << " Dir) {\n";
+     << getDirectiveType(DirLang) << " Dir) {\n";
   OS << "  switch (Dir) {\n";
 
   for (const Record *R : DirLang.getDirectives()) {
     Directive D(R);
-    OS << "  case " << GetDirectiveName(DirLang, R) << ":\n";
+    OS << "  case " << getDirectiveName(DirLang, R) << ":\n";
     OS << "    return " << CategoryNamespace
        << D.getCategory()->getValueAsString("name") << ";\n";
   }
@@ -760,7 +760,7 @@ static void GenerateGetDirectiveCategory(const DirectiveLanguage &DirLang,
 }
 
 // Generate a simple enum set with the give clauses.
-static void GenerateClauseSet(ArrayRef<const Record *> Clauses, raw_ostream &OS,
+static void generateClauseSet(ArrayRef<const Record *> Clauses, raw_ostream &OS,
                               StringRef ClauseSetPrefix, const Directive &Dir,
                               const DirectiveLanguage &DirLang) {
 
@@ -778,7 +778,7 @@ static void GenerateClauseSet(ArrayRef<const Record *> Clauses, raw_ostream &OS,
 }
 
 // Generate an enum set for the 4 kinds of clauses linked to a directive.
-static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
+static void generateDirectiveClauseSets(const DirectiveLanguage &DirLang,
                                         raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_SETS", OS);
@@ -796,13 +796,13 @@ static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
     OS << "\n";
     OS << "  // Sets for " << Dir.getName() << "\n";
 
-    GenerateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir,
+    generateClauseSet(Dir.getAllowedClauses(), OS, "allowedClauses_", Dir,
                       DirLang);
-    GenerateClauseSet(Dir.getAllowedOnceClauses(), OS, "allowedOnceClauses_",
+    generateClauseSet(Dir.getAllowedOnceClauses(), OS, "allowedOnceClauses_",
                       Dir, DirLang);
-    GenerateClauseSet(Dir.getAllowedExclusiveClauses(), OS,
+    generateClauseSet(Dir.getAllowedExclusiveClauses(), OS,
                       "allowedExclusiveClauses_", Dir, DirLang);
-    GenerateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir,
+    generateClauseSet(Dir.getRequiredClauses(), OS, "requiredClauses_", Dir,
                       DirLang);
   }
 
@@ -816,7 +816,7 @@ static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
 // Generate a map of directive (key) with DirectiveClauses struct as values.
 // The struct holds the 4 sets of enumeration for the 4 kinds of clauses
 // allowances (allowed, allowed once, allowed exclusive and required).
-static void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang,
+static void generateDirectiveClauseMap(const DirectiveLanguage &DirLang,
                                        raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_MAP", OS);
@@ -850,7 +850,7 @@ static void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang,
 // If the clause does not hold a value, an EMPTY_CLASS is used.
 // If the clause class is generic then a WRAPPER_CLASS is used. When the value
 // is optional, the value class is wrapped into a std::optional.
-static void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang,
+static void generateFlangClauseParserClass(const DirectiveLanguage &DirLang,
                                            raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES", OS);
@@ -877,7 +877,7 @@ static void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang,
 }
 
 // Generate a list of the different clause classes for Flang.
-static void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
+static void generateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
                                                raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES_LIST", OS);
@@ -890,7 +890,7 @@ static void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
 }
 
 // Generate dump node list for the clauses holding a generic class name.
-static void GenerateFlangClauseDump(const DirectiveLanguage &DirLang,
+static void generateFlangClauseDump(const DirectiveLanguage &DirLang,
                                     raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_DUMP_PARSE_TREE_CLAUSES", OS);
@@ -904,7 +904,7 @@ static void GenerateFlangClauseDump(const DirectiveLanguage &DirLang,
 
 // Generate Unparse functions for clauses classes in the Flang parse-tree
 // If the clause is a non-generic class, no entry is generated.
-static void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang,
+static void generateFlangClauseUnparse(const DirectiveLanguage &DirLang,
                                        raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_CLAUSE_UNPARSE", OS);
@@ -955,7 +955,7 @@ static void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang,
 }
 
 // Generate check in the Enter functions for clauses classes.
-static void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang,
+static void generateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang,
                                                raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_CLAUSE_CHECK_ENTER", OS);
@@ -969,7 +969,7 @@ static void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang,
 
 // Generate the mapping for clauses between the parser class and the
 // corresponding clause Kind
-static void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang,
+static void generateFlangClauseParserKindMap(const DirectiveLanguage &DirLang,
                                              raw_ostream &OS) {
 
   IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_KIND_MAP", OS);
@@ -996,7 +996,7 @@ static bool compareClauseName(const Record *R1, const Record *R2) {
 }
 
 // Generate the parser for the clauses.
-static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
+static void generateFlangClausesParser(const DirectiveLanguage &DirLang,
                                        raw_ostream &OS) {
   std::vector<const Record *> Clauses = DirLang.getClauses();
   // Sort clauses in reverse alphabetical order so with clauses with same
@@ -1004,8 +1004,8 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
   sort(Clauses, compareClauseName);
   IfDefScope Scope("GEN_FLANG_CLAUSES_PARSER", OS);
   OS << "\n";
-  unsigned index = 0;
-  unsigned lastClauseIndex = Clauses.size() - 1;
+  unsigned Index = 0;
+  unsigned LastClauseIndex = Clauses.size() - 1;
   OS << "TYPE_PARSER(\n";
   for (const Clause Clause : Clauses) {
     if (Clause.getAliases().empty()) {
@@ -1013,8 +1013,8 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
     } else {
       OS << "  ("
          << "\"" << Clause.getName() << "\"_tok";
-      for (StringRef alias : Clause.getAliases()) {
-        OS << " || \"" << alias << "\"_tok";
+      for (StringRef Alias : Clause.getAliases()) {
+        OS << " || \"" << Alias << "\"_tok";
       }
       OS << ")";
     }
@@ -1024,10 +1024,10 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
        << "::" << Clause.getFormattedParserClassName() << ">(";
     if (Clause.getFlangClass().empty()) {
       OS << "))";
-      if (index != lastClauseIndex)
+      if (Index != LastClauseIndex)
         OS << " ||";
       OS << "\n";
-      ++index;
+      ++Index;
       continue;
     }
 
@@ -1064,38 +1064,38 @@ static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
     if (Clause.isValueOptional()) // close maybe(.
       OS << ")";
     OS << "))";
-    if (index != lastClauseIndex)
+    if (Index != LastClauseIndex)
       OS << " ||";
     OS << "\n";
-    ++index;
+    ++Index;
   }
   OS << ")\n";
 }
 
 // Generate the implementation section for the enumeration in the directive
 // language
-static void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang,
+static void emitDirectivesFlangImpl(const DirectiveLanguage &DirLang,
                                     raw_ostream &OS) {
-  GenerateDirectiveClauseSets(DirLang, OS);
+  generateDirectiveClauseSets(DirLang, OS);
 
-  GenerateDirectiveClauseMap(DirLang, OS);
+  generateDirectiveClauseMap(DirLang, OS);
 
-  GenerateFlangClauseParserClass(DirLang, OS);
+  generateFlangClauseParserClass(DirLang, OS);
 
-  GenerateFlangClauseParserClassList(DirLang, OS);
+  generateFlangClauseParserClassList(DirLang, OS);
 
-  GenerateFlangClauseDump(DirLang, OS);
+  generateFlangClauseDump(DirLang, OS);
 
-  GenerateFlangClauseUnparse(DirLang, OS);
+  generateFlangClauseUnparse(DirLang, OS);
 
-  GenerateFlangClauseCheckPrototypes(DirLang, OS);
+  generateFlangClauseCheckPrototypes(DirLang, OS);
 
-  GenerateFlangClauseParserKindMap(DirLang, OS);
+  generateFlangClauseParserKindMap(DirLang, OS);
 
-  GenerateFlangClausesParser(DirLang, OS);
+  generateFlangClausesParser(DirLang, OS);
 }
 
-static void GenerateClauseClassMacro(const DirectiveLanguage &DirLang,
+static void generateClauseClassMacro(const DirectiveLanguage &DirLang,
                                      raw_ostream &OS) {
   // Generate macros style information for legacy code in clang
   IfDefScope Scope("GEN_CLANG_CLAUSE_CLASS", OS);
@@ -1163,63 +1163,63 @@ static void GenerateClauseClassMacro(const DirectiveLanguage &DirLang,
 
 // Generate the implemenation for the enumeration in the directive
 // language. This code can be included in library.
-void EmitDirectivesBasicImpl(const DirectiveLanguage &DirLang,
+void emitDirectivesBasicImpl(const DirectiveLanguage &DirLang,
                              raw_ostream &OS) {
   IfDefScope Scope("GEN_DIRECTIVES_IMPL", OS);
 
   OS << "\n#include \"llvm/Support/ErrorHandling.h\"\n";
 
   // getDirectiveKind(StringRef Str)
-  GenerateGetKind(DirLang.getDirectives(), OS, "Directive", DirLang,
+  generateGetKind(DirLang.getDirectives(), OS, "Directive", DirLang,
                   DirLang.getDirectivePrefix(), /*ImplicitAsUnknown=*/false);
 
   // getDirectiveName(Directive Kind)
-  GenerateGetName(DirLang.getDirectives(), OS, "Directive", DirLang,
+  generateGetName(DirLang.getDirectives(), OS, "Directive", DirLang,
                   DirLang.getDirectivePrefix());
 
   // getClauseKind(StringRef Str)
-  GenerateGetKind(DirLang.getClauses(), OS, "Clause", DirLang,
+  generateGetKind(DirLang.getClauses(), OS, "Clause", DirLang,
                   DirLang.getClausePrefix(),
                   /*ImplicitAsUnknown=*/true);
 
   // getClauseName(Clause Kind)
-  GenerateGetName(DirLang.getClauses(), OS, "Clause", DirLang,
+  generateGetName(DirLang.getClauses(), OS, "Clause", DirLang,
                   DirLang.getClausePrefix());
 
   // get<ClauseVal>Kind(StringRef Str)
-  GenerateGetKindClauseVal(DirLang, OS);
+  generateGetKindClauseVal(DirLang, OS);
 
   // isAllowedClauseForDirective(Directive D, Clause C, unsigned Version)
-  GenerateIsAllowedClause(DirLang, OS);
+  generateIsAllowedClause(DirLang, OS);
 
   // getDirectiveAssociation(Directive D)
-  GenerateGetDirectiveAssociation(DirLang, OS);
+  generateGetDirectiveAssociation(DirLang, OS);
 
   // getDirectiveCategory(Directive D)
-  GenerateGetDirectiveCategory(DirLang, OS);
+  generateGetDirectiveCategory(DirLang, OS);
 
   // Leaf table for getLeafConstructs, etc.
-  EmitLeafTable(DirLang, OS, "LeafConstructTable");
+  emitLeafTable(DirLang, OS, "LeafConstructTable");
 }
 
 // Generate the implemenation section for the enumeration in the directive
 // language.
-static void EmitDirectivesImpl(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitDirectivesImpl(const RecordKeeper &Records, raw_ostream &OS) {
   const auto DirLang = DirectiveLanguage(Records);
   if (DirLang.HasValidityErrors())
     return;
 
-  EmitDirectivesFlangImpl(DirLang, OS);
+  emitDirectivesFlangImpl(DirLang, OS);
 
-  GenerateClauseClassMacro(DirLang, OS);
+  generateClauseClassMacro(DirLang, OS);
 
-  EmitDirectivesBasicImpl(DirLang, OS);
+  emitDirectivesBasicImpl(DirLang, OS);
 }
 
 static TableGen::Emitter::Opt
-    X("gen-directive-decl", EmitDirectivesDecl,
+    X("gen-directive-decl", emitDirectivesDecl,
       "Generate directive related declaration code (header file)");
 
 static TableGen::Emitter::Opt
-    Y("gen-directive-impl", EmitDirectivesImpl,
+    Y("gen-directive-impl", emitDirectivesImpl,
       "Generate directive related implementation code");
diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp
index eb15392272a3f31..70d835e699ffeff 100644
--- a/llvm/utils/TableGen/DisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp
@@ -95,7 +95,7 @@ using namespace llvm::X86Disassembler;
 /// X86RecognizableInstr.cpp contains the implementation for a single
 ///   instruction.
 
-static void EmitDisassembler(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitDisassembler(const RecordKeeper &Records, raw_ostream &OS) {
   const CodeGenTarget Target(Records);
   emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS);
 
@@ -132,5 +132,5 @@ static void EmitDisassembler(const RecordKeeper &Records, raw_ostream &OS) {
 
 cl::OptionCategory DisassemblerEmitterCat("Options for -gen-disassembler");
 
-static TableGen::Emitter::Opt X("gen-disassembler", EmitDisassembler,
+static TableGen::Emitter::Opt X("gen-disassembler", emitDisassembler,
                                 "Generate disassembler");
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index 17198c85f060091..2052222cae5e5f2 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -718,19 +718,20 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
             const PredMap &PM = RI.second;
 
             OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_"
-               << getLegalCName(std::string(getName(VT))) << "_"
-               << getLegalCName(std::string(getName(RetVT))) << "_";
+               << getLegalCName(std::string(getEnumName(VT))) << "_"
+               << getLegalCName(std::string(getEnumName(RetVT))) << "_";
             Operands.PrintManglingSuffix(OS, ImmediatePredicates);
             OS << "(";
             Operands.PrintParameters(OS);
             OS << ") {\n";
 
-            emitInstructionCode(OS, Operands, PM, std::string(getName(RetVT)));
+            emitInstructionCode(OS, Operands, PM,
+                                std::string(getEnumName(RetVT)));
           }
 
           // Emit one function for the type that demultiplexes on return type.
           OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_"
-             << getLegalCName(std::string(getName(VT))) << "_";
+             << getLegalCName(std::string(getEnumName(VT))) << "_";
           Operands.PrintManglingSuffix(OS, ImmediatePredicates);
           OS << "(MVT RetVT";
           if (!Operands.empty())
@@ -739,10 +740,10 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
           OS << ") {\nswitch (RetVT.SimpleTy) {\n";
           for (const auto &RI : RM) {
             MVT::SimpleValueType RetVT = RI.first;
-            OS << "  case " << getName(RetVT) << ": return fastEmit_"
+            OS << "  case " << getEnumName(RetVT) << ": return fastEmit_"
                << getLegalCName(Opcode) << "_"
-               << getLegalCName(std::string(getName(VT))) << "_"
-               << getLegalCName(std::string(getName(RetVT))) << "_";
+               << getLegalCName(std::string(getEnumName(VT))) << "_"
+               << getLegalCName(std::string(getEnumName(RetVT))) << "_";
             Operands.PrintManglingSuffix(OS, ImmediatePredicates);
             OS << "(";
             Operands.PrintArguments(OS);
@@ -753,7 +754,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
         } else {
           // Non-variadic return type.
           OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_"
-             << getLegalCName(std::string(getName(VT))) << "_";
+             << getLegalCName(std::string(getEnumName(VT))) << "_";
           Operands.PrintManglingSuffix(OS, ImmediatePredicates);
           OS << "(MVT RetVT";
           if (!Operands.empty())
@@ -761,7 +762,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
           Operands.PrintParameters(OS);
           OS << ") {\n";
 
-          OS << "  if (RetVT.SimpleTy != " << getName(RM.begin()->first)
+          OS << "  if (RetVT.SimpleTy != " << getEnumName(RM.begin()->first)
              << ")\n    return 0;\n";
 
           const PredMap &PM = RM.begin()->second;
@@ -781,7 +782,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
       OS << "  switch (VT.SimpleTy) {\n";
       for (const auto &TI : TM) {
         MVT::SimpleValueType VT = TI.first;
-        std::string TypeName = std::string(getName(VT));
+        std::string TypeName = std::string(getEnumName(VT));
         OS << "  case " << TypeName << ": return fastEmit_"
            << getLegalCName(Opcode) << "_" << getLegalCName(TypeName) << "_";
         Operands.PrintManglingSuffix(OS, ImmediatePredicates);
diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
index 424f1ccb067f905..149ba7a1d9032d3 100644
--- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
@@ -40,9 +40,7 @@
 #include "Common/SubtargetFeatureInfo.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index e866bd983e04ea6..859310906af4686 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -38,7 +38,6 @@
 #include "Common/GlobalISel/GlobalISelMatchTable.h"
 #include "Common/GlobalISel/GlobalISelMatchTableExecutorEmitter.h"
 #include "Common/InfoByHwMode.h"
-#include "Common/SubtargetFeatureInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGenTypes/LowLevelType.h"
 #include "llvm/CodeGenTypes/MachineValueType.h"
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 1968e7eac21e337..070d7522a97be9f 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -29,7 +29,6 @@
 #include <array>
 #include <cassert>
 #include <cctype>
-#include <limits>
 #include <map>
 #include <optional>
 #include <string>
diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp
index 2872762cc7fd96e..86e8378ad5ac5d6 100644
--- a/llvm/utils/TableGen/OptionParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptionParserEmitter.cpp
@@ -15,7 +15,6 @@
 #include "llvm/TableGen/TableGenBackend.h"
 #include <cstring>
 #include <map>
-#include <memory>
 
 using namespace llvm;
 
@@ -27,7 +26,7 @@ static std::string getOptionName(const Record &R) {
   return std::string(R.getValueAsString("EnumName"));
 }
 
-static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) {
+static raw_ostream &writeCstring(raw_ostream &OS, llvm::StringRef Str) {
   OS << '"';
   OS.write_escaped(Str);
   OS << '"';
@@ -118,7 +117,7 @@ struct SimpleEnumValueTable {
     OS << "static const SimpleEnumValue " << ValueTableName << "[] = {\n";
     for (unsigned I = 0, E = Values.size(); I != E; ++I) {
       OS << "{";
-      write_cstring(OS, Values[I]);
+      writeCstring(OS, Values[I]);
       OS << ",";
       OS << "static_cast<unsigned>(";
       emitScopedNormalizedValue(OS, NormalizedValues[I]);
@@ -191,7 +190,7 @@ static MarshallingInfo createMarshallingInfo(const Record &R) {
   return Ret;
 }
 
-static void EmitHelpTextsForVariants(
+static void emitHelpTextsForVariants(
     raw_ostream &OS, std::vector<std::pair<std::vector<std::string>, StringRef>>
                          HelpTextsForVariants) {
   // OptTable must be constexpr so it uses std::arrays with these capacities.
@@ -236,7 +235,7 @@ static void EmitHelpTextsForVariants(
     OS << "}}, ";
 
     if (Help.size())
-      write_cstring(OS, Help);
+      writeCstring(OS, Help);
     else
       OS << "nullptr";
     OS << ")";
@@ -250,7 +249,7 @@ static void EmitHelpTextsForVariants(
 /// OptionParserEmitter - This tablegen backend takes an input .td file
 /// describing a list of options and emits a data structure for parsing and
 /// working with those options when given an input command line.
-static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
   // Get the option groups and options.
   ArrayRef<const Record *> Groups =
       Records.getAllDerivedDefinitions("OptionGroup");
@@ -364,12 +363,12 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
     if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
       OS << ",\n";
       OS << "       ";
-      write_cstring(OS, R.getValueAsString("HelpText"));
+      writeCstring(OS, R.getValueAsString("HelpText"));
     } else
       OS << ", nullptr";
 
     // Not using Visibility specific text for group help.
-    EmitHelpTextsForVariants(OS, {});
+    emitHelpTextsForVariants(OS, {});
 
     // The option meta-variable name (unused).
     OS << ", nullptr";
@@ -388,7 +387,7 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
     OS << Prefixes[PrefixKeyT(RPrefixes.begin(), RPrefixes.end())] << ", ";
 
     // The option prefixed name.
-    write_cstring(OS, getOptionPrefixedName(R));
+    writeCstring(OS, getOptionPrefixedName(R));
 
     // The option identifier name.
     OS << ", " << getOptionName(R);
@@ -465,7 +464,7 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
     if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
       OS << ",\n";
       OS << "       ";
-      write_cstring(OS, R.getValueAsString("HelpText"));
+      writeCstring(OS, R.getValueAsString("HelpText"));
     } else
       OS << ", nullptr";
 
@@ -483,19 +482,19 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
       HelpTextsForVariants.push_back(std::make_pair(
           VisibilityNames, VisibilityHelp->getValueAsString("Text")));
     }
-    EmitHelpTextsForVariants(OS, HelpTextsForVariants);
+    emitHelpTextsForVariants(OS, HelpTextsForVariants);
 
     // The option meta-variable name.
     OS << ", ";
     if (!isa<UnsetInit>(R.getValueInit("MetaVarName")))
-      write_cstring(OS, R.getValueAsString("MetaVarName"));
+      writeCstring(OS, R.getValueAsString("MetaVarName"));
     else
       OS << "nullptr";
 
     // The option Values. Used for shell autocompletion.
     OS << ", ";
     if (!isa<UnsetInit>(R.getValueInit("Values")))
-      write_cstring(OS, R.getValueAsString("Values"));
+      writeCstring(OS, R.getValueAsString("Values"));
     else if (!isa<UnsetInit>(R.getValueInit("ValuesCode"))) {
       OS << getOptionName(R) << "_Values";
     } else
@@ -572,5 +571,5 @@ static void EmitOptionParser(const RecordKeeper &Records, raw_ostream &OS) {
   OS << "\n";
 }
 
-static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptionParser,
+static TableGen::Emitter::Opt X("gen-opt-parser-defs", emitOptionParser,
                                 "Generate option definitions");
diff --git a/llvm/utils/TableGen/OptionRSTEmitter.cpp b/llvm/utils/TableGen/OptionRSTEmitter.cpp
index 1b4c4cad4f0a451..6eac10e1831f022 100644
--- a/llvm/utils/TableGen/OptionRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptionRSTEmitter.cpp
@@ -16,7 +16,7 @@ using namespace llvm;
 
 /// This tablegen backend takes an input .td file describing a list of options
 /// and emits a RST man page.
-static void EmitOptionRST(const RecordKeeper &Records, raw_ostream &OS) {
+static void emitOptionRst(const RecordKeeper &Records, raw_ostream &OS) {
   llvm::StringMap<std::vector<const Record *>> OptionsByGroup;
 
   // Get the options.
@@ -96,5 +96,5 @@ static void EmitOptionRST(const RecordKeeper &Records, raw_ostream &OS) {
   }
 }
 
-static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptionRST,
+static TableGen::Emitter::Opt X("gen-opt-rst", emitOptionRst,
                                 "Generate option RST");
diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
index d2d2bd91445a141..bcbc6ea20751fe7 100644
--- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -17,7 +17,6 @@
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TGTimer.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <vector>
 using namespace llvm;
 
 #define DEBUG_TYPE "pseudo-lowering"
diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
index 23496a37d5ea1c3..39211aab6f2d1ec 100644
--- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
@@ -244,13 +244,13 @@ static void emitRISCVExtensionBitmask(const RecordKeeper &RK, raw_ostream &OS) {
   OS << "#endif\n";
 }
 
-static void EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
+static void emitRiscvTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
   emitRISCVExtensions(RK, OS);
   emitRISCVProfiles(RK, OS);
   emitRISCVProcs(RK, OS);
   emitRISCVExtensionBitmask(RK, OS);
 }
 
-static TableGen::Emitter::Opt X("gen-riscv-target-def", EmitRISCVTargetDef,
+static TableGen::Emitter::Opt X("gen-riscv-target-def", emitRiscvTargetDef,
                                 "Generate the list of CPUs and extensions for "
                                 "RISC-V");
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 17b84d06fe85738..02c799cb6f14710 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -87,65 +87,65 @@ class SubtargetEmitter {
   CodeGenSchedModels &SchedModels;
   std::string Target;
 
-  FeatureMapTy Enumeration(raw_ostream &OS);
-  void EmitSubtargetInfoMacroCalls(raw_ostream &OS);
-  unsigned FeatureKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap);
-  unsigned CPUKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap);
-  void FormItineraryStageString(const std::string &Names,
+  FeatureMapTy enumeration(raw_ostream &OS);
+  void emitSubtargetInfoMacroCalls(raw_ostream &OS);
+  unsigned featureKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap);
+  unsigned cpuKeyValues(raw_ostream &OS, const FeatureMapTy &FeatureMap);
+  void formItineraryStageString(const std::string &Names,
                                 const Record *ItinData, std::string &ItinString,
                                 unsigned &NStages);
-  void FormItineraryOperandCycleString(const Record *ItinData,
+  void formItineraryOperandCycleString(const Record *ItinData,
                                        std::string &ItinString,
                                        unsigned &NOperandCycles);
-  void FormItineraryBypassString(const std::string &Names,
+  void formItineraryBypassString(const std::string &Names,
                                  const Record *ItinData,
                                  std::string &ItinString,
                                  unsigned NOperandCycles);
-  void EmitStageAndOperandCycleData(
+  void emitStageAndOperandCycleData(
       raw_ostream &OS, std::vector<std::vector<InstrItinerary>> &ProcItinLists);
-  void EmitItineraries(raw_ostream &OS,
+  void emitItineraries(raw_ostream &OS,
                        std::vector<std::vector<InstrItinerary>> &ProcItinLists);
-  unsigned EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
+  unsigned emitRegisterFileTables(const CodeGenProcModel &ProcModel,
                                   raw_ostream &OS);
-  void EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
+  void emitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
                               raw_ostream &OS);
-  void EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
+  void emitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
                               raw_ostream &OS);
-  void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
+  void emitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
                          char Separator);
-  void EmitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel,
+  void emitProcessorResourceSubUnits(const CodeGenProcModel &ProcModel,
                                      raw_ostream &OS);
-  void EmitProcessorResources(const CodeGenProcModel &ProcModel,
+  void emitProcessorResources(const CodeGenProcModel &ProcModel,
                               raw_ostream &OS);
-  const Record *FindWriteResources(const CodeGenSchedRW &SchedWrite,
+  const Record *findWriteResources(const CodeGenSchedRW &SchedWrite,
                                    const CodeGenProcModel &ProcModel);
-  const Record *FindReadAdvance(const CodeGenSchedRW &SchedRead,
+  const Record *findReadAdvance(const CodeGenSchedRW &SchedRead,
                                 const CodeGenProcModel &ProcModel);
-  void ExpandProcResources(ConstRecVec &PRVec,
+  void expandProcResources(ConstRecVec &PRVec,
                            std::vector<int64_t> &ReleaseAtCycles,
                            std::vector<int64_t> &AcquireAtCycles,
                            const CodeGenProcModel &ProcModel);
-  void GenSchedClassTables(const CodeGenProcModel &ProcModel,
+  void genSchedClassTables(const CodeGenProcModel &ProcModel,
                            SchedClassTables &SchedTables);
-  void EmitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS);
-  void EmitProcessorModels(raw_ostream &OS);
-  void EmitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS);
+  void emitSchedClassTables(SchedClassTables &SchedTables, raw_ostream &OS);
+  void emitProcessorModels(raw_ostream &OS);
+  void emitSchedModelHelpers(const std::string &ClassName, raw_ostream &OS);
   void emitSchedModelHelpersImpl(raw_ostream &OS,
                                  bool OnlyExpandMCInstPredicates = false);
   void emitGenMCSubtargetInfo(raw_ostream &OS);
-  void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS);
+  void emitMcInstrAnalysisPredicateFunctions(raw_ostream &OS);
 
-  void EmitSchedModel(raw_ostream &OS);
+  void emitSchedModel(raw_ostream &OS);
   void emitGetMacroFusions(const std::string &ClassName, raw_ostream &OS);
-  void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
-  void ParseFeaturesFunction(raw_ostream &OS);
+  void emitHwModeCheck(const std::string &ClassName, raw_ostream &OS);
+  void parseFeaturesFunction(raw_ostream &OS);
 
 public:
   SubtargetEmitter(const RecordKeeper &R)
       : TGT(R), Records(R), SchedModels(TGT.getSchedModels()),
         Target(TGT.getName()) {}
 
-  void run(raw_ostream &o);
+  void run(raw_ostream &O);
 };
 
 } // end anonymous namespace
@@ -153,7 +153,7 @@ class SubtargetEmitter {
 //
 // Enumeration - Emit the specified class as an enumeration.
 //
-FeatureMapTy SubtargetEmitter::Enumeration(raw_ostream &OS) {
+FeatureMapTy SubtargetEmitter::enumeration(raw_ostream &OS) {
   ArrayRef<const Record *> DefList =
       Records.getAllDerivedDefinitions("SubtargetFeature");
 
@@ -171,15 +171,15 @@ FeatureMapTy SubtargetEmitter::Enumeration(raw_ostream &OS) {
 
   FeatureMapTy FeatureMap;
   // For each record
-  for (unsigned i = 0; i < N; ++i) {
+  for (unsigned I = 0; I < N; ++I) {
     // Next record
-    const Record *Def = DefList[i];
+    const Record *Def = DefList[I];
 
     // Get and emit name
-    OS << "  " << Def->getName() << " = " << i << ",\n";
+    OS << "  " << Def->getName() << " = " << I << ",\n";
 
     // Save the index for this feature.
-    FeatureMap[Def] = i;
+    FeatureMap[Def] = I;
   }
 
   OS << "  "
@@ -201,9 +201,9 @@ static void printFeatureMask(raw_ostream &OS,
   }
 
   OS << "{ { { ";
-  for (unsigned i = 0; i != Mask.size(); ++i) {
+  for (unsigned I = 0; I != Mask.size(); ++I) {
     OS << "0x";
-    OS.write_hex(Mask[i]);
+    OS.write_hex(Mask[I]);
     OS << "ULL, ";
   }
   OS << "} } }";
@@ -211,7 +211,7 @@ static void printFeatureMask(raw_ostream &OS,
 
 /// Emit some information about the SubtargetFeature as calls to a macro so
 /// that they can be used from C++.
-void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) {
+void SubtargetEmitter::emitSubtargetInfoMacroCalls(raw_ostream &OS) {
   OS << "\n#ifdef GET_SUBTARGETINFO_MACRO\n";
 
   std::vector<const Record *> FeatureList =
@@ -252,7 +252,7 @@ void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) {
 // FeatureKeyValues - Emit data of all the subtarget features.  Used by the
 // command line.
 //
-unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS,
+unsigned SubtargetEmitter::featureKeyValues(raw_ostream &OS,
                                             const FeatureMapTy &FeatureMap) {
   std::vector<const Record *> FeatureList =
       Records.getAllDerivedDefinitions("SubtargetFeature");
@@ -301,7 +301,7 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS,
 // CPUKeyValues - Emit data of all the subtarget processors.  Used by command
 // line.
 //
-unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
+unsigned SubtargetEmitter::cpuKeyValues(raw_ostream &OS,
                                         const FeatureMapTy &FeatureMap) {
   // Gather and sort processor information
   std::vector<const Record *> ProcessorList =
@@ -349,7 +349,7 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS,
 // data initialization for the specified itinerary.  N is the number
 // of stages.
 //
-void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
+void SubtargetEmitter::formItineraryStageString(const std::string &Name,
                                                 const Record *ItinData,
                                                 std::string &ItinString,
                                                 unsigned &NStages) {
@@ -358,9 +358,9 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
 
   // For each stage
   unsigned N = NStages = StageList.size();
-  for (unsigned i = 0; i < N;) {
+  for (unsigned I = 0; I < N;) {
     // Next stage
-    const Record *Stage = StageList[i];
+    const Record *Stage = StageList[I];
 
     // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc, kind }
     int Cycles = Stage->getValueAsInt("Cycles");
@@ -370,10 +370,10 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
     ConstRecVec UnitList = Stage->getValueAsListOfDefs("Units");
 
     // For each unit
-    for (unsigned j = 0, M = UnitList.size(); j < M;) {
+    for (unsigned J = 0, M = UnitList.size(); J < M;) {
       // Add name and bitwise or
-      ItinString += Name + "FU::" + UnitList[j]->getName().str();
-      if (++j < M)
+      ItinString += Name + "FU::" + UnitList[J]->getName().str();
+      if (++J < M)
         ItinString += " | ";
     }
 
@@ -385,7 +385,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
 
     // Close off stage
     ItinString += " }";
-    if (++i < N)
+    if (++I < N)
       ItinString += ", ";
   }
 }
@@ -395,7 +395,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
 // operand cycle initialization for the specified itinerary.  N is the
 // number of operands that has cycles specified.
 //
-void SubtargetEmitter::FormItineraryOperandCycleString(
+void SubtargetEmitter::formItineraryOperandCycleString(
     const Record *ItinData, std::string &ItinString, unsigned &NOperandCycles) {
   // Get operand cycle list
   std::vector<int64_t> OperandCycleList =
@@ -411,19 +411,19 @@ void SubtargetEmitter::FormItineraryOperandCycleString(
   }
 }
 
-void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
+void SubtargetEmitter::formItineraryBypassString(const std::string &Name,
                                                  const Record *ItinData,
                                                  std::string &ItinString,
                                                  unsigned NOperandCycles) {
   ConstRecVec BypassList = ItinData->getValueAsListOfDefs("Bypasses");
   unsigned N = BypassList.size();
-  unsigned i = 0;
+  unsigned I = 0;
   ListSeparator LS;
-  for (; i < N; ++i) {
+  for (; I < N; ++I) {
     ItinString += LS;
-    ItinString += Name + "Bypass::" + BypassList[i]->getName().str();
+    ItinString += Name + "Bypass::" + BypassList[I]->getName().str();
   }
-  for (; i < NOperandCycles; ++i) {
+  for (; I < NOperandCycles; ++I) {
     ItinString += LS;
     ItinString += " 0";
   }
@@ -434,7 +434,7 @@ void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
 // cycle tables. Create a list of InstrItinerary objects (ProcItinLists) indexed
 // by CodeGenSchedClass::Index.
 //
-void SubtargetEmitter::EmitStageAndOperandCycleData(
+void SubtargetEmitter::emitStageAndOperandCycleData(
     raw_ostream &OS, std::vector<std::vector<InstrItinerary>> &ProcItinLists) {
   // Multiple processor models may share an itinerary record. Emit it once.
   SmallPtrSet<const Record *, 8> ItinsDefSet;
@@ -453,9 +453,9 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
     OS << "\n// Functional units for \"" << Name << "\"\n"
        << "namespace " << Name << "FU {\n";
 
-    for (unsigned j = 0, FUN = FUs.size(); j < FUN; ++j)
-      OS << "  const InstrStage::FuncUnits " << FUs[j]->getName()
-         << " = 1ULL << " << j << ";\n";
+    for (unsigned J = 0, FUN = FUs.size(); J < FUN; ++J)
+      OS << "  const InstrStage::FuncUnits " << FUs[J]->getName()
+         << " = 1ULL << " << J << ";\n";
 
     OS << "} // end namespace " << Name << "FU\n";
 
@@ -466,8 +466,8 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
          << "namespace " << Name << "Bypass {\n";
 
       OS << "  const unsigned NoBypass = 0;\n";
-      for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
-        OS << "  const unsigned " << BPs[j]->getName() << " = 1 << " << j
+      for (unsigned J = 0, BPN = BPs.size(); J < BPN; ++J)
+        OS << "  const unsigned " << BPs[J]->getName() << " = 1 << " << J
            << ";\n";
 
       OS << "} // end namespace " << Name << "Bypass\n";
@@ -518,7 +518,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
       std::string ItinStageString;
       unsigned NStages = 0;
       if (ItinData)
-        FormItineraryStageString(std::string(Name), ItinData, ItinStageString,
+        formItineraryStageString(std::string(Name), ItinData, ItinStageString,
                                  NStages);
 
       // Get string and operand cycle count
@@ -526,10 +526,10 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
       unsigned NOperandCycles = 0;
       std::string ItinBypassString;
       if (ItinData) {
-        FormItineraryOperandCycleString(ItinData, ItinOperandCycleString,
+        formItineraryOperandCycleString(ItinData, ItinOperandCycleString,
                                         NOperandCycles);
 
-        FormItineraryBypassString(std::string(Name), ItinData, ItinBypassString,
+        formItineraryBypassString(std::string(Name), ItinData, ItinBypassString,
                                   NOperandCycles);
       }
 
@@ -610,7 +610,7 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(
 // Itineraries for each processor. The Itinerary lists are indexed on
 // CodeGenSchedClass::Index.
 //
-void SubtargetEmitter::EmitItineraries(
+void SubtargetEmitter::emitItineraries(
     raw_ostream &OS, std::vector<std::vector<InstrItinerary>> &ProcItinLists) {
   // Multiple processor models may share an itinerary record. Emit it once.
   SmallPtrSet<const Record *, 8> ItinsDefSet;
@@ -642,15 +642,15 @@ void SubtargetEmitter::EmitItineraries(
     OS << ItinsDef->getName() << "[] = {\n";
 
     // For each itinerary class in CodeGenSchedClass::Index order.
-    for (unsigned j = 0, M = ItinList.size(); j < M; ++j) {
-      InstrItinerary &Intinerary = ItinList[j];
+    for (unsigned J = 0, M = ItinList.size(); J < M; ++J) {
+      InstrItinerary &Intinerary = ItinList[J];
 
       // Emit Itinerary in the form of
       // { firstStage, lastStage, firstCycle, lastCycle } // index
       OS << "  { " << Intinerary.NumMicroOps << ", " << Intinerary.FirstStage
          << ", " << Intinerary.LastStage << ", " << Intinerary.FirstOperandCycle
          << ", " << Intinerary.LastOperandCycle << " }"
-         << ", // " << j << " " << SchedModels.getSchedClass(j).Name << "\n";
+         << ", // " << J << " " << SchedModels.getSchedClass(J).Name << "\n";
     }
     // End processor itinerary table
     OS << "  { 0, uint16_t(~0U), uint16_t(~0U), uint16_t(~0U), uint16_t(~0U) }"
@@ -662,7 +662,7 @@ void SubtargetEmitter::EmitItineraries(
 // Emit either the value defined in the TableGen Record, or the default
 // value defined in the C++ header. The Record is null if the processor does not
 // define a model.
-void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R,
+void SubtargetEmitter::emitProcessorProp(raw_ostream &OS, const Record *R,
                                          StringRef Name, char Separator) {
   OS << "  ";
   int V = R ? R->getValueAsInt(Name) : -1;
@@ -673,14 +673,14 @@ void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R,
   OS << '\n';
 }
 
-void SubtargetEmitter::EmitProcessorResourceSubUnits(
+void SubtargetEmitter::emitProcessorResourceSubUnits(
     const CodeGenProcModel &ProcModel, raw_ostream &OS) {
   OS << "\nstatic const unsigned " << ProcModel.ModelName
      << "ProcResourceSubUnits[] = {\n"
      << "  0,  // Invalid\n";
 
-  for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
-    const Record *PRDef = ProcModel.ProcResourceDefs[i];
+  for (unsigned I = 0, E = ProcModel.ProcResourceDefs.size(); I < E; ++I) {
+    const Record *PRDef = ProcModel.ProcResourceDefs[I];
     if (!PRDef->isSubClassOf("ProcResGroup"))
       continue;
     for (const Record *RUDef : PRDef->getValueAsListOfDefs("Resources")) {
@@ -695,7 +695,7 @@ void SubtargetEmitter::EmitProcessorResourceSubUnits(
   OS << "};\n";
 }
 
-static void EmitRetireControlUnitInfo(const CodeGenProcModel &ProcModel,
+static void emitRetireControlUnitInfo(const CodeGenProcModel &ProcModel,
                                       raw_ostream &OS) {
   int64_t ReorderBufferSize = 0, MaxRetirePerCycle = 0;
   if (const Record *RCU = ProcModel.RetireControlUnit) {
@@ -709,7 +709,7 @@ static void EmitRetireControlUnitInfo(const CodeGenProcModel &ProcModel,
   OS << MaxRetirePerCycle << ", // MaxRetirePerCycle\n  ";
 }
 
-static void EmitRegisterFileInfo(const CodeGenProcModel &ProcModel,
+static void emitRegisterFileInfo(const CodeGenProcModel &ProcModel,
                                  unsigned NumRegisterFiles,
                                  unsigned NumCostEntries, raw_ostream &OS) {
   if (NumRegisterFiles)
@@ -726,7 +726,7 @@ static void EmitRegisterFileInfo(const CodeGenProcModel &ProcModel,
 }
 
 unsigned
-SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
+SubtargetEmitter::emitRegisterFileTables(const CodeGenProcModel &ProcModel,
                                          raw_ostream &OS) {
   if (llvm::all_of(ProcModel.RegisterFiles, [](const CodeGenRegisterFile &RF) {
         return RF.hasDefaultCosts();
@@ -778,7 +778,7 @@ SubtargetEmitter::EmitRegisterFileTables(const CodeGenProcModel &ProcModel,
   return CostTblIndex;
 }
 
-void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
+void SubtargetEmitter::emitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
                                               raw_ostream &OS) {
   unsigned QueueID = 0;
   if (ProcModel.LoadQueue) {
@@ -798,33 +798,33 @@ void SubtargetEmitter::EmitLoadStoreQueueInfo(const CodeGenProcModel &ProcModel,
   OS << "  " << QueueID << ", // Resource Descriptor for the Store Queue\n";
 }
 
-void SubtargetEmitter::EmitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
+void SubtargetEmitter::emitExtraProcessorInfo(const CodeGenProcModel &ProcModel,
                                               raw_ostream &OS) {
   // Generate a table of register file descriptors (one entry per each user
   // defined register file), and a table of register costs.
-  unsigned NumCostEntries = EmitRegisterFileTables(ProcModel, OS);
+  unsigned NumCostEntries = emitRegisterFileTables(ProcModel, OS);
 
   // Now generate a table for the extra processor info.
   OS << "\nstatic const llvm::MCExtraProcessorInfo " << ProcModel.ModelName
      << "ExtraInfo = {\n  ";
 
   // Add information related to the retire control unit.
-  EmitRetireControlUnitInfo(ProcModel, OS);
+  emitRetireControlUnitInfo(ProcModel, OS);
 
   // Add information related to the register files (i.e. where to find register
   // file descriptors and register costs).
-  EmitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),
+  emitRegisterFileInfo(ProcModel, ProcModel.RegisterFiles.size(),
                        NumCostEntries, OS);
 
   // Add information about load/store queues.
-  EmitLoadStoreQueueInfo(ProcModel, OS);
+  emitLoadStoreQueueInfo(ProcModel, OS);
 
   OS << "};\n";
 }
 
-void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
+void SubtargetEmitter::emitProcessorResources(const CodeGenProcModel &ProcModel,
                                               raw_ostream &OS) {
-  EmitProcessorResourceSubUnits(ProcModel, OS);
+  emitProcessorResourceSubUnits(ProcModel, OS);
 
   OS << "\n// {Name, NumUnits, SuperIdx, BufferSize, SubUnitsIdxBegin}\n";
   OS << "static const llvm::MCProcResourceDesc " << ProcModel.ModelName
@@ -833,8 +833,8 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
      << "  {\"InvalidUnit\", 0, 0, 0, 0},\n";
 
   unsigned SubUnitsOffset = 1;
-  for (unsigned i = 0, e = ProcModel.ProcResourceDefs.size(); i < e; ++i) {
-    const Record *PRDef = ProcModel.ProcResourceDefs[i];
+  for (unsigned I = 0, E = ProcModel.ProcResourceDefs.size(); I < E; ++I) {
+    const Record *PRDef = ProcModel.ProcResourceDefs[I];
 
     const Record *SuperDef = nullptr;
     unsigned SuperIdx = 0;
@@ -866,7 +866,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
     } else {
       OS << "nullptr";
     }
-    OS << "}, // #" << i + 1;
+    OS << "}, // #" << I + 1;
     if (SuperDef)
       OS << ", Super=" << SuperDef->getName();
     OS << "\n";
@@ -877,7 +877,7 @@ void SubtargetEmitter::EmitProcessorResources(const CodeGenProcModel &ProcModel,
 // Find the WriteRes Record that defines processor resources for this
 // SchedWrite.
 const Record *
-SubtargetEmitter::FindWriteResources(const CodeGenSchedRW &SchedWrite,
+SubtargetEmitter::findWriteResources(const CodeGenSchedRW &SchedWrite,
                                      const CodeGenProcModel &ProcModel) {
 
   // Check if the SchedWrite is already subtarget-specific and directly
@@ -938,7 +938,7 @@ SubtargetEmitter::FindWriteResources(const CodeGenSchedRW &SchedWrite,
 /// Find the ReadAdvance record for the given SchedRead on this processor or
 /// return NULL.
 const Record *
-SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
+SubtargetEmitter::findReadAdvance(const CodeGenSchedRW &SchedRead,
                                   const CodeGenProcModel &ProcModel) {
   // Check for SchedReads that directly specify a ReadAdvance.
   if (SchedRead.TheDef->isSubClassOf("SchedReadAdvance"))
@@ -997,12 +997,12 @@ SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
 
 // Expand an explicit list of processor resources into a full list of implied
 // resource groups and super resources that cover them.
-void SubtargetEmitter::ExpandProcResources(
+void SubtargetEmitter::expandProcResources(
     ConstRecVec &PRVec, std::vector<int64_t> &ReleaseAtCycles,
     std::vector<int64_t> &AcquireAtCycles, const CodeGenProcModel &PM) {
   assert(PRVec.size() == ReleaseAtCycles.size() && "failed precondition");
-  for (unsigned i = 0, e = PRVec.size(); i != e; ++i) {
-    const Record *PRDef = PRVec[i];
+  for (unsigned I = 0, E = PRVec.size(); I != E; ++I) {
+    const Record *PRDef = PRVec[I];
     ConstRecVec SubResources;
     if (PRDef->isSubClassOf("ProcResGroup"))
       SubResources = PRDef->getValueAsListOfDefs("Resources");
@@ -1019,8 +1019,8 @@ void SubtargetEmitter::ExpandProcResources(
         const Record *SuperDef = SchedModels.findProcResUnits(
             SubDef->getValueAsDef("Super"), PM, SubDef->getLoc());
         PRVec.push_back(SuperDef);
-        ReleaseAtCycles.push_back(ReleaseAtCycles[i]);
-        AcquireAtCycles.push_back(AcquireAtCycles[i]);
+        ReleaseAtCycles.push_back(ReleaseAtCycles[I]);
+        AcquireAtCycles.push_back(AcquireAtCycles[I]);
         SubDef = SuperDef;
       }
     }
@@ -1036,8 +1036,8 @@ void SubtargetEmitter::ExpandProcResources(
       }
       if (SubI == SubE) {
         PRVec.push_back(PR);
-        ReleaseAtCycles.push_back(ReleaseAtCycles[i]);
-        AcquireAtCycles.push_back(AcquireAtCycles[i]);
+        ReleaseAtCycles.push_back(ReleaseAtCycles[I]);
+        AcquireAtCycles.push_back(AcquireAtCycles[I]);
       }
     }
   }
@@ -1045,7 +1045,7 @@ void SubtargetEmitter::ExpandProcResources(
 
 // Generate the SchedClass table for this processor and update global
 // tables. Must be called for each processor in order.
-void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
+void SubtargetEmitter::genSchedClassTables(const CodeGenProcModel &ProcModel,
                                            SchedClassTables &SchedTables) {
   std::vector<MCSchedClassDesc> &SCTab =
       SchedTables.ProcSchedClasses.emplace_back();
@@ -1147,7 +1147,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
 
       for (unsigned WS : WriteSeq) {
         const Record *WriteRes =
-            FindWriteResources(SchedModels.getSchedWrite(WS), ProcModel);
+            findWriteResources(SchedModels.getSchedWrite(WS), ProcModel);
 
         // Mark the parent class as invalid for unsupported write types.
         if (WriteRes->getValueAsBit("Unsupported")) {
@@ -1209,7 +1209,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
 
         assert(AcquireAtCycles.size() == ReleaseAtCycles.size());
 
-        ExpandProcResources(PRVec, ReleaseAtCycles, AcquireAtCycles, ProcModel);
+        expandProcResources(PRVec, ReleaseAtCycles, AcquireAtCycles, ProcModel);
         assert(AcquireAtCycles.size() == ReleaseAtCycles.size());
 
         for (unsigned PRIdx = 0, PREnd = PRVec.size(); PRIdx != PREnd;
@@ -1263,7 +1263,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     for (unsigned UseIdx = 0, EndIdx = Reads.size(); UseIdx != EndIdx;
          ++UseIdx) {
       const Record *ReadAdvance =
-          FindReadAdvance(SchedModels.getSchedRead(Reads[UseIdx]), ProcModel);
+          findReadAdvance(SchedModels.getSchedRead(Reads[UseIdx]), ProcModel);
       if (!ReadAdvance)
         continue;
 
@@ -1323,12 +1323,12 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
         SchedTables.WriteLatencies.begin(), SchedTables.WriteLatencies.end(),
         WriteLatencies.begin(), WriteLatencies.end());
     if (WLPos != SchedTables.WriteLatencies.end()) {
-      unsigned idx = WLPos - SchedTables.WriteLatencies.begin();
-      SCDesc.WriteLatencyIdx = idx;
-      for (unsigned i = 0, e = WriteLatencies.size(); i < e; ++i)
-        if (SchedTables.WriterNames[idx + i].find(WriterNames[i]) ==
+      unsigned Idx = WLPos - SchedTables.WriteLatencies.begin();
+      SCDesc.WriteLatencyIdx = Idx;
+      for (unsigned I = 0, E = WriteLatencies.size(); I < E; ++I)
+        if (SchedTables.WriterNames[Idx + I].find(WriterNames[I]) ==
             std::string::npos) {
-          SchedTables.WriterNames[idx + i] += std::string("_") + WriterNames[i];
+          SchedTables.WriterNames[Idx + I] += std::string("_") + WriterNames[I];
         }
     } else {
       SCDesc.WriteLatencyIdx = SchedTables.WriteLatencies.size();
@@ -1351,7 +1351,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
 }
 
 // Emit SchedClass tables for all processors and associated global tables.
-void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,
+void SubtargetEmitter::emitSchedClassTables(SchedClassTables &SchedTables,
                                             raw_ostream &OS) {
   // Emit global WriteProcResTable.
   OS << "\n// {ProcResourceIdx, ReleaseAtCycle, AcquireAtCycle}\n"
@@ -1446,15 +1446,15 @@ void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,
   }
 }
 
-void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
+void SubtargetEmitter::emitProcessorModels(raw_ostream &OS) {
   // For each processor model.
   for (const CodeGenProcModel &PM : SchedModels.procModels()) {
     // Emit extra processor info if available.
     if (PM.hasExtraProcessorInfo())
-      EmitExtraProcessorInfo(PM, OS);
+      emitExtraProcessorInfo(PM, OS);
     // Emit processor resource table.
     if (PM.hasInstrSchedModel())
-      EmitProcessorResources(PM, OS);
+      emitProcessorResources(PM, OS);
     else if (!PM.ProcResourceDefs.empty())
       PrintFatalError(PM.ModelDef->getLoc(),
                       "SchedMachineModel defines "
@@ -1463,12 +1463,12 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     // Begin processor itinerary properties
     OS << "\n";
     OS << "static const llvm::MCSchedModel " << PM.ModelName << " = {\n";
-    EmitProcessorProp(OS, PM.ModelDef, "IssueWidth", ',');
-    EmitProcessorProp(OS, PM.ModelDef, "MicroOpBufferSize", ',');
-    EmitProcessorProp(OS, PM.ModelDef, "LoopMicroOpBufferSize", ',');
-    EmitProcessorProp(OS, PM.ModelDef, "LoadLatency", ',');
-    EmitProcessorProp(OS, PM.ModelDef, "HighLatency", ',');
-    EmitProcessorProp(OS, PM.ModelDef, "MispredictPenalty", ',');
+    emitProcessorProp(OS, PM.ModelDef, "IssueWidth", ',');
+    emitProcessorProp(OS, PM.ModelDef, "MicroOpBufferSize", ',');
+    emitProcessorProp(OS, PM.ModelDef, "LoopMicroOpBufferSize", ',');
+    emitProcessorProp(OS, PM.ModelDef, "LoadLatency", ',');
+    emitProcessorProp(OS, PM.ModelDef, "HighLatency", ',');
+    emitProcessorProp(OS, PM.ModelDef, "MispredictPenalty", ',');
 
     bool PostRAScheduler =
         (PM.ModelDef ? PM.ModelDef->getValueAsBit("PostRAScheduler") : false);
@@ -1516,7 +1516,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
 //
 // EmitSchedModel - Emits all scheduling model tables, folding common patterns.
 //
-void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
+void SubtargetEmitter::emitSchedModel(raw_ostream &OS) {
   OS << "#ifdef DBGFIELD\n"
      << "#error \"<target>GenSubtargetInfo.inc requires a DBGFIELD macro\"\n"
      << "#endif\n"
@@ -1529,22 +1529,22 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
   if (SchedModels.hasItineraries()) {
     std::vector<std::vector<InstrItinerary>> ProcItinLists;
     // Emit the stage data
-    EmitStageAndOperandCycleData(OS, ProcItinLists);
-    EmitItineraries(OS, ProcItinLists);
+    emitStageAndOperandCycleData(OS, ProcItinLists);
+    emitItineraries(OS, ProcItinLists);
   }
   OS << "\n// ===============================================================\n"
      << "// Data tables for the new per-operand machine model.\n";
 
   SchedClassTables SchedTables;
   for (const CodeGenProcModel &ProcModel : SchedModels.procModels()) {
-    GenSchedClassTables(ProcModel, SchedTables);
+    genSchedClassTables(ProcModel, SchedTables);
   }
-  EmitSchedClassTables(SchedTables, OS);
+  emitSchedClassTables(SchedTables, OS);
 
   OS << "\n#undef DBGFIELD\n";
 
   // Emit the processor machine model
-  EmitProcessorModels(OS);
+  emitProcessorModels(OS);
 }
 
 static void emitPredicateProlog(const RecordKeeper &Records, raw_ostream &OS) {
@@ -1756,7 +1756,7 @@ void SubtargetEmitter::emitSchedModelHelpersImpl(
   emitSchedModelHelperEpilogue(OS, OnlyExpandMCInstPredicates);
 }
 
-void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
+void SubtargetEmitter::emitSchedModelHelpers(const std::string &ClassName,
                                              raw_ostream &OS) {
   OS << "unsigned " << ClassName
      << "\n::resolveSchedClass(unsigned SchedClass, const MachineInstr *MI,"
@@ -1786,7 +1786,7 @@ void SubtargetEmitter::EmitSchedModelHelpers(const std::string &ClassName,
     PE.expandSTIPredicate(OS, Fn);
 }
 
-void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
+void SubtargetEmitter::emitHwModeCheck(const std::string &ClassName,
                                        raw_ostream &OS) {
   const CodeGenHwModes &CGH = TGT.getHwModes();
   assert(CGH.getNumModeIds() > 0);
@@ -1825,7 +1825,7 @@ void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
   OS << "  return Modes;\n}\n";
   // End emitting for getHwModeSet().
 
-  auto handlePerMode = [&](std::string ModeType, unsigned ModeInBitSet) {
+  auto HandlePerMode = [&](std::string ModeType, unsigned ModeInBitSet) {
     OS << "  case HwMode_" << ModeType << ":\n"
        << "    Modes &= " << ModeInBitSet << ";\n"
        << "    if (!Modes)\n      return Modes;\n"
@@ -1842,9 +1842,9 @@ void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName,
   OS << "  if (!Modes)\n    return Modes;\n\n";
   OS << "  switch (type) {\n";
   OS << "  case HwMode_Default:\n    return llvm::countr_zero(Modes) + 1;\n";
-  handlePerMode("ValueType", ValueTypeModes);
-  handlePerMode("RegInfo", RegInfoModes);
-  handlePerMode("EncodingInfo", EncodingInfoModes);
+  HandlePerMode("ValueType", ValueTypeModes);
+  HandlePerMode("RegInfo", RegInfoModes);
+  HandlePerMode("EncodingInfo", EncodingInfoModes);
   OS << "  }\n";
   OS << "  llvm_unreachable(\"unexpected HwModeType\");\n"
      << "  return 0; // should not get here\n}\n";
@@ -1871,7 +1871,7 @@ void SubtargetEmitter::emitGetMacroFusions(const std::string &ClassName,
 
 // Produces a subtarget specific function for parsing
 // the subtarget features string.
-void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
+void SubtargetEmitter::parseFeaturesFunction(raw_ostream &OS) {
   ArrayRef<const Record *> Features =
       Records.getAllDerivedDefinitions("SubtargetFeature");
 
@@ -1951,10 +1951,10 @@ void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) {
        << "    return MCSubtargetInfo::isCPUStringValid(CPU);\n"
        << "  }\n";
   OS << "};\n";
-  EmitHwModeCheck(Target + "GenMCSubtargetInfo", OS);
+  emitHwModeCheck(Target + "GenMCSubtargetInfo", OS);
 }
 
-void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) {
+void SubtargetEmitter::emitMcInstrAnalysisPredicateFunctions(raw_ostream &OS) {
   OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n";
   OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n";
 
@@ -1988,18 +1988,18 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_SUBTARGETINFO_ENUM\n\n";
 
   OS << "namespace llvm {\n";
-  auto FeatureMap = Enumeration(OS);
+  auto FeatureMap = enumeration(OS);
   OS << "} // end namespace llvm\n\n";
   OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
 
-  EmitSubtargetInfoMacroCalls(OS);
+  emitSubtargetInfoMacroCalls(OS);
 
   OS << "namespace llvm {\n";
-  unsigned NumFeatures = FeatureKeyValues(OS, FeatureMap);
+  unsigned NumFeatures = featureKeyValues(OS, FeatureMap);
   OS << "\n";
-  EmitSchedModel(OS);
+  emitSchedModel(OS);
   OS << "\n";
-  unsigned NumProcs = CPUKeyValues(OS, FeatureMap);
+  unsigned NumProcs = cpuKeyValues(OS, FeatureMap);
   OS << "\n";
 
   // MCInstrInfo initialization routine.
@@ -2045,7 +2045,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "#include \"llvm/Support/raw_ostream.h\"\n\n";
   if (Target == "AArch64")
     OS << "#include \"llvm/TargetParser/AArch64TargetParser.h\"\n\n";
-  ParseFeaturesFunction(OS);
+  parseFeaturesFunction(OS);
 
   OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n";
 
@@ -2140,15 +2140,15 @@ void SubtargetEmitter::run(raw_ostream &OS) {
     OS << "nullptr, nullptr, nullptr";
   OS << ") {}\n\n";
 
-  EmitSchedModelHelpers(ClassName, OS);
-  EmitHwModeCheck(ClassName, OS);
+  emitSchedModelHelpers(ClassName, OS);
+  emitHwModeCheck(ClassName, OS);
   emitGetMacroFusions(ClassName, OS);
 
   OS << "} // end namespace llvm\n\n";
 
   OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
 
-  EmitMCInstrAnalysisPredicateFunctions(OS);
+  emitMcInstrAnalysisPredicateFunctions(OS);
 }
 
 static TableGen::Emitter::OptClass<SubtargetEmitter>
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index fff4c6b7c27ada1..bea2a2e735dbe27 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -39,17 +39,17 @@ static cl::opt<std::string> Class("class",
                                   cl::value_desc("class name"),
                                   cl::cat(PrintEnumsCat));
 
-static void PrintRecords(const RecordKeeper &Records, raw_ostream &OS) {
+static void printRecords(const RecordKeeper &Records, raw_ostream &OS) {
   OS << Records; // No argument, dump all contents
 }
 
-static void PrintEnums(const RecordKeeper &Records, raw_ostream &OS) {
+static void printEnums(const RecordKeeper &Records, raw_ostream &OS) {
   for (const Record *Rec : Records.getAllDerivedDefinitions(Class))
     OS << Rec->getName() << ", ";
   OS << "\n";
 }
 
-static void PrintSets(const RecordKeeper &Records, raw_ostream &OS) {
+static void printSets(const RecordKeeper &Records, raw_ostream &OS) {
   SetTheory Sets;
   Sets.addFieldExpander("Set", "Elements");
   for (const Record *Rec : Records.getAllDerivedDefinitions("Set")) {
@@ -63,15 +63,15 @@ static void PrintSets(const RecordKeeper &Records, raw_ostream &OS) {
 }
 
 static TableGen::Emitter::Opt X[] = {
-    {"print-records", PrintRecords, "Print all records to stdout (default)",
+    {"print-records", printRecords, "Print all records to stdout (default)",
      true},
     {"print-detailed-records", EmitDetailedRecords,
      "Print full details of all records to stdout"},
     {"null-backend", [](const RecordKeeper &Records, raw_ostream &OS) {},
      "Do nothing after parsing (useful for timing)"},
     {"dump-json", EmitJSON, "Dump all records as machine-readable JSON"},
-    {"print-enums", PrintEnums, "Print enum values for a class"},
-    {"print-sets", PrintSets, "Print expanded sets for testing DAG exprs"},
+    {"print-enums", printEnums, "Print enum values for a class"},
+    {"print-sets", printSets, "Print expanded sets for testing DAG exprs"},
 };
 
 int main(int argc, char **argv) {
diff --git a/llvm/utils/TableGen/VTEmitter.cpp b/llvm/utils/TableGen/VTEmitter.cpp
index 8f4bcd5fccc73d0..d02932dd5e7fca5 100644
--- a/llvm/utils/TableGen/VTEmitter.cpp
+++ b/llvm/utils/TableGen/VTEmitter.cpp
@@ -10,7 +10,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <array>
 #include <cassert>
 #include <map>
 using namespace llvm;
@@ -29,7 +28,7 @@ class VTEmitter {
 
 } // End anonymous namespace.
 
-static void VTtoGetLLVMTyString(raw_ostream &OS, const Record *VT) {
+static void vTtoGetLlvmTyString(raw_ostream &OS, const Record *VT) {
   bool IsVector = VT->getValueAsBit("isVector");
   bool IsRISCVVecTuple = VT->getValueAsBit("isRISCVVecTuple");
 
@@ -208,7 +207,7 @@ void VTEmitter::run(raw_ostream &OS) {
       continue;
 
     OS << "  GET_VT_EVT(" << VT->getValueAsString("LLVMName") << ", ";
-    VTtoGetLLVMTyString(OS, VT);
+    vTtoGetLlvmTyString(OS, VT);
     OS << ")\n";
   }
   OS << "#endif\n\n";
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 0fbb73431d2cfb8..cdfa8978566fb4d 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -275,8 +275,10 @@ def __init__(
         self.run_lines = find_run_lines(test, self.input_lines)
         self.comment_prefix = comment_prefix
         if self.comment_prefix is None:
-            if self.path.endswith(".mir"):
+            if self.path.endswith(".mir") or self.path.endswith(".txt"):
                 self.comment_prefix = "#"
+            elif self.path.endswith(".s"):
+                self.comment_prefix = "//"
             else:
                 self.comment_prefix = ";"
         self.autogenerated_note_prefix = self.comment_prefix + " " + UTC_ADVERT
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
index f23bccf9d511856..61e4f8da3c04def 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/bugprone/BUILD.gn
@@ -56,6 +56,7 @@ static_library("bugprone") {
     "MultipleStatementMacroCheck.cpp",
     "NoEscapeCheck.cpp",
     "NonZeroEnumToBoolConversionCheck.cpp",
+    "NondeterministicPointerIterationOrderCheck.cpp",
     "NotNullTerminatedResultCheck.cpp",
     "OptionalValueConversionCheck.cpp",
     "ParentVirtualCallCheck.cpp",
diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index 1bbec962ff68d5d..b47189accd13651 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -245,6 +245,8 @@ copy("Headers") {
     "mmintrin.h",
     "module.modulemap",
     "movdirintrin.h",
+    "movrs_avx10_2_512intrin.h",
+    "movrs_avx10_2intrin.h",
     "msa.h",
     "mwaitxintrin.h",
     "nmmintrin.h",
@@ -291,6 +293,7 @@ copy("Headers") {
     "shaintrin.h",
     "sifive_vector.h",
     "sm3intrin.h",
+    "sm4evexintrin.h",
     "sm4intrin.h",
     "smmintrin.h",
     "stdalign.h",
diff --git a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
index 7a6c360e88c14e8..fe211726eb5d59d 100644
--- a/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/StaticAnalyzer/Checkers/BUILD.gn
@@ -99,8 +99,6 @@ static_library("Checkers") {
     "ObjCUnusedIVarsChecker.cpp",
     "PaddingChecker.cpp",
     "PointerArithChecker.cpp",
-    "PointerIterationChecker.cpp",
-    "PointerSortingChecker.cpp",
     "PointerSubChecker.cpp",
     "PthreadLockChecker.cpp",
     "PutenvStackArrayChecker.cpp",
@@ -141,8 +139,8 @@ static_library("Checkers") {
     "VforkChecker.cpp",
     "VirtualCallChecker.cpp",
     "WebKit/ASTUtils.cpp",
-    "WebKit/RawPtrRefMemberChecker.cpp",
     "WebKit/PtrTypesSemantics.cpp",
+    "WebKit/RawPtrRefMemberChecker.cpp",
     "WebKit/RefCntblBaseVirtualDtorChecker.cpp",
     "WebKit/UncountedCallArgsChecker.cpp",
     "WebKit/UncountedLambdaCapturesChecker.cpp",
diff --git a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn
index 7194c47b1becc48..5fbda794ff176ef 100644
--- a/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/test/BUILD.gn
@@ -55,7 +55,6 @@ write_cmake_config("lit_common_configured") {
     "COMPILER_RT_ENABLE_INTERNAL_SYMBOLIZER_PYBOOL=False",
     "COMPILER_RT_HAS_NO_DEFAULT_CONFIG_FLAG_PYBOOL=True",
     "COMPILER_RT_INTERCEPT_LIBDISPATCH_PYBOOL=False",
-    "COMPILER_RT_RESOLVED_EXEC_OUTPUT_DIR=" + rebase_path(crt_current_out_dir),
     "COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR=" +
         rebase_path(crt_current_out_dir),
     "COMPILER_RT_RESOLVED_OUTPUT_DIR=" + rebase_path(crt_current_out_dir),
diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 0a2552f4b0616e4..776f1d32c5f520f 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -430,6 +430,8 @@ if (current_toolchain == default_toolchain) {
       "__filesystem/recursive_directory_iterator.h",
       "__filesystem/space_info.h",
       "__filesystem/u8path.h",
+      "__flat_map/flat_map.h",
+      "__flat_map/sorted_unique.h",
       "__format/buffer.h",
       "__format/concepts.h",
       "__format/container_adaptor.h",
@@ -564,15 +566,16 @@ if (current_toolchain == default_toolchain) {
       "__locale",
       "__locale_dir/locale_base_api.h",
       "__locale_dir/locale_base_api/android.h",
+      "__locale_dir/locale_base_api/apple.h",
       "__locale_dir/locale_base_api/bsd_locale_defaults.h",
       "__locale_dir/locale_base_api/bsd_locale_fallbacks.h",
+      "__locale_dir/locale_base_api/freebsd.h",
       "__locale_dir/locale_base_api/fuchsia.h",
       "__locale_dir/locale_base_api/ibm.h",
-      "__locale_dir/locale_base_api/locale_guard.h",
       "__locale_dir/locale_base_api/musl.h",
-      "__locale_dir/locale_base_api/newlib.h",
       "__locale_dir/locale_base_api/openbsd.h",
       "__locale_dir/locale_base_api/win32.h",
+      "__locale_dir/locale_guard.h",
       "__math/abs.h",
       "__math/copysign.h",
       "__math/error_functions.h",
@@ -954,6 +957,14 @@ if (current_toolchain == default_toolchain) {
       "__utility/to_underlying.h",
       "__utility/unreachable.h",
       "__variant/monostate.h",
+      "__vector/comparison.h",
+      "__vector/container_traits.h",
+      "__vector/erase.h",
+      "__vector/pmr.h",
+      "__vector/swap.h",
+      "__vector/vector.h",
+      "__vector/vector_bool.h",
+      "__vector/vector_bool_formatter.h",
       "__verbose_abort",
       "algorithm",
       "any",
@@ -984,6 +995,7 @@ if (current_toolchain == default_toolchain) {
       "coroutine",
       "csetjmp",
       "csignal",
+      "cstdalign",
       "cstdarg",
       "cstdbool",
       "cstddef",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
index 64b03b57388cb2d..d152aec19d1b587 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
@@ -152,6 +152,7 @@ static_library("Support") {
     "TimeProfiler.cpp",
     "Timer.cpp",
     "ToolOutputFile.cpp",
+    "TrieRawHashMap.cpp",
     "Twine.cpp",
     "TypeSize.cpp",
     "Unicode.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
index c27faaaecf30d16..07ed3b4718af50e 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ADT/BUILD.gn
@@ -94,6 +94,7 @@ unittest("ADTTests") {
     "StringSetTest.cpp",
     "StringSwitchTest.cpp",
     "TinyPtrVectorTest.cpp",
+    "TrieRawHashMapTest.cpp",
     "TwineTest.cpp",
     "TypeSwitchTest.cpp",
     "TypeTraitsTest.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn
index 6f52677cb83338e..7f74b335e30ed99 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn
@@ -17,8 +17,9 @@ unittest("JITLinkTests") {
     "AArch32ErrorTests.cpp",
     "AArch32Tests.cpp",
     "EHFrameSupportTests.cpp",
-    "JITLinkMocks.cpp",
+    "JITLinkTestUtils.cpp",
     "LinkGraphTests.cpp",
+    "MachOLinkGraphTests.cpp",
     "MemoryManagerErrorTests.cpp",
     "StubsTests.cpp",
   ]
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
index 44640c6527c903b..97df71c6279efde 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/BUILD.gn
@@ -14,5 +14,6 @@ unittest("SandboxVectorizerTests") {
     "IntervalTest.cpp",
     "LegalityTest.cpp",
     "SchedulerTest.cpp",
+    "VecUtilsTest.cpp",
   ]
 }
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index 1ef5796cd32e448..5f762ec7f3514ab 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -57,13 +57,6 @@ def __init__(self, lit_config, config):
                 self.lit_config.note("using lit tools: {}".format(path))
                 lit_path_displayed = True
 
-        if platform.system() == "AIX":
-            # Diff on AIX doesn't have all the required features (see
-            # https://github.com/llvm/llvm-project/pull/108871 and
-            # https://github.com/llvm/llvm-project/pull/112997#issuecomment-2429656192)
-            # so always use the internal shell.
-            self.use_lit_shell = True
-
         if platform.system() == "OS/390":
             self.with_environment("_BPXK_AUTOCVT", "ON")
             self.with_environment("_TAG_REDIR_IN", "TXT")
diff --git a/llvm/utils/update_mc_test_checks.py b/llvm/utils/update_mc_test_checks.py
index f9f8cfdea418d01..55ed6c82d4877e2 100755
--- a/llvm/utils/update_mc_test_checks.py
+++ b/llvm/utils/update_mc_test_checks.py
@@ -6,6 +6,7 @@
 from __future__ import print_function
 
 import argparse
+import functools
 import os  # Used to advertise this file's name ("autogenerated_note").
 
 from UpdateTestChecks import common
@@ -50,6 +51,10 @@ def isTestLine(input_line, mc_mode):
     return True
 
 
+def isRunLine(l):
+    return common.RUN_LINE_RE.match(l)
+
+
 def hasErr(err):
     return err and ERROR_RE.search(err) is not None
 
@@ -118,6 +123,19 @@ def main():
         default=None,
         help="Set a default -march for when neither triple nor arch are found in a RUN line",
     )
+    parser.add_argument(
+        "--unique",
+        action="store_true",
+        default=False,
+        help="remove duplicated test line if found",
+    )
+    parser.add_argument(
+        "--sort",
+        action="store_true",
+        default=False,
+        help="sort testline in alphabetic order (keep run-lines on top), this option could be dangerous as it"
+        "could change the order of lines that are not expected",
+    )
     parser.add_argument("tests", nargs="+")
     initial_args = common.parse_commandline_args(parser)
 
@@ -130,6 +148,11 @@ def main():
             mc_mode = "asm"
         elif ti.path.endswith(".txt"):
             mc_mode = "dasm"
+
+            if ti.args.sort:
+                print("sorting with dasm(.txt) file is not supported!")
+                return -1
+
         else:
             common.warn("Expected .s and .txt, Skipping file : ", ti.path)
             continue
@@ -196,6 +219,10 @@ def main():
 
         # find all test line from input
         testlines = [l for l in ti.input_lines if isTestLine(l, mc_mode)]
+        # remove duplicated lines to save running time
+        testlines = list(dict.fromkeys(testlines))
+        common.debug("Valid test line found: ", len(testlines))
+
         run_list_size = len(run_list)
         testnum = len(testlines)
 
@@ -233,7 +260,7 @@ def main():
             raw_prefixes.append(prefixes)
 
         output_lines = []
-        generated_prefixes = []
+        generated_prefixes = {}
         used_prefixes = set()
         prefix_set = set([prefix for p in run_list for prefix in p[0]])
         common.debug("Rewriting FileCheck prefixes:", str(prefix_set))
@@ -298,23 +325,72 @@ def main():
                     else:
                         gen_prefix += getStdCheckLine(prefix, o, mc_mode)
 
-            generated_prefixes.append(gen_prefix.rstrip("\n"))
+            generated_prefixes[input_line] = gen_prefix.rstrip("\n")
 
         # write output
-        prefix_id = 0
         for input_info in ti.iterlines(output_lines):
             input_line = input_info.line
-            if isTestLine(input_line, mc_mode):
+            if input_line in testlines:
                 output_lines.append(input_line)
-                output_lines.append(generated_prefixes[prefix_id])
-                prefix_id += 1
+                output_lines.append(generated_prefixes[input_line])
 
             elif should_add_line_to_output(input_line, prefix_set, mc_mode):
                 output_lines.append(input_line)
 
-            elif input_line in ti.run_lines or input_line == "":
-                output_lines.append(input_line)
+        if ti.args.unique or ti.args.sort:
+            # split with double newlines
+            test_units = "\n".join(output_lines).split("\n\n")
+
+            # select the key line for each test unit
+            test_dic = {}
+            for unit in test_units:
+                lines = unit.split("\n")
+                for l in lines:
+                    # if contains multiple lines, use
+                    # the first testline or runline as key
+                    if isTestLine(l, mc_mode):
+                        test_dic[unit] = l
+                        break
+                    if isRunLine(l):
+                        test_dic[unit] = l
+                        break
+
+            # unique
+            if ti.args.unique:
+                new_test_units = []
+                written_lines = set()
+                for unit in test_units:
+                    # if not testline/runline, we just add it
+                    if unit not in test_dic:
+                        new_test_units.append(unit)
+                    else:
+                        if test_dic[unit] in written_lines:
+                            common.debug("Duplicated test skipped: ", unit)
+                            continue
+
+                        written_lines.add(test_dic[unit])
+                        new_test_units.append(unit)
+                test_units = new_test_units
+
+            # sort
+            if ti.args.sort:
+
+                def getkey(l):
+                    # find key of test unit, otherwise use first line
+                    if l in test_dic:
+                        line = test_dic[l]
+                    else:
+                        line = l.split("\n")[0]
+
+                    # runline placed on the top
+                    return (not isRunLine(line), line)
+
+                test_units = sorted(test_units, key=getkey)
+
+            # join back to be output string
+            output_lines = "\n\n".join(test_units).split("\n")
 
+        # output
         if ti.args.gen_unused_prefix_body:
             output_lines.extend(
                 ti.get_checks_for_unused_prefixes(run_list, used_prefixes)
diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md
index d5a426e09e7ceb1..7d38ebb38535c73 100644
--- a/mlir/docs/Bufferization.md
+++ b/mlir/docs/Bufferization.md
@@ -579,7 +579,6 @@ The code, slightly simplified and annotated, is reproduced here:
   // Partial bufferization passes.
   pm.addPass(createTensorConstantBufferizePass());
   pm.addNestedPass<func::FuncOp>(createTCPBufferizePass()); // Bufferizes the downstream `tcp` dialect.
-  pm.addNestedPass<func::FuncOp>(createSCFBufferizePass());
   pm.addNestedPass<func::FuncOp>(createLinalgBufferizePass());
   pm.addNestedPass<func::FuncOp>(createTensorBufferizePass());
   pm.addPass(createFuncBufferizePass());
@@ -596,7 +595,7 @@ must be module passes because they make changes to the top-level module.
 
 The bulk of the bufferization work is done by the function passes. Most of these
 passes are provided as part of the upstream MLIR distribution and bufferize
-their respective dialects (e.g. `scf-bufferize` bufferizes the `scf` dialect).
+their respective dialects (e.g. `abc-bufferize` bufferizes the `abc` dialect).
 The `tcp-bufferize` pass is an exception -- it is a partial bufferization pass
 used to bufferize the downstream `tcp` dialect, and fits in perfectly with all
 the other passes provided upstream.
@@ -694,20 +693,6 @@ which helps with this in general.
 
 ### Other partial bufferization examples
 
--   `scf-bufferize`
-    ([code](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp#L1),
-    [test](https://github.com/llvm/llvm-project/blob/bc8acf2ce8ad6e8c9b1d97b2e02d3f4ad26e1d9d/mlir/test/Dialect/SCF/bufferize.mlir#L1))
-
-    -   Bufferizes ops from the `scf` dialect.
-    -   This is an example of how to bufferize ops that implement
-        `RegionBranchOpInterface` (that is, they use regions to represent
-        control flow).
-    -   The bulk of the work is done by
-        `lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp`
-        ([code](https://github.com/llvm/llvm-project/blob/daaaed6bb89044ac58a23f1bb1ccdd12342a5a58/mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp#L1)),
-        which is well-commented and covers how to correctly convert ops that
-        contain regions.
-
 -   `func-bufferize`
     ([code](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp#L1),
     [test](https://github.com/llvm/llvm-project/blob/2f5715dc78328215d51d5664c72c632a6dac1046/mlir/test/Dialect/Func/func-bufferize.mlir#L1))
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index d236cae0d80882c..63e007cdc335cc1 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -33,6 +33,7 @@
 #include "mlir/Support/ThreadLocalCache.h"
 #include "llvm/ADT/PointerEmbeddedInt.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
@@ -87,6 +88,13 @@ class GEPArg : public PointerUnion<Value, GEPConstantIndex> {
 } // namespace LLVM
 } // namespace mlir
 
+namespace mlir {
+namespace LLVM {
+struct AssumeAlignTag {};
+struct AssumeSeparateStorageTag {};
+} // namespace LLVM
+} // namespace mlir
+
 ///// Ops /////
 #define GET_OP_CLASSES
 #include "mlir/Dialect/LLVMIR/LLVMOps.h.inc"
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
index 845c88b1be77509..d07ebbacc604346 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td
@@ -450,7 +450,14 @@ def LLVM_AssumeOp
   }];
 
   let builders = [
-    OpBuilder<(ins "Value":$cond)>
+    OpBuilder<(ins "Value":$cond)>,
+    OpBuilder<(ins "Value":$cond,
+                   "ArrayRef<llvm::OperandBundleDefT<Value>>":$opBundles)>,
+    OpBuilder<(ins "Value":$cond, "llvm::StringRef":$tag, "ValueRange":$args)>,
+    OpBuilder<(ins "Value":$cond, "AssumeAlignTag":$tag, "Value":$ptr,
+                   "Value":$align)>,
+    OpBuilder<(ins "Value":$cond, "AssumeSeparateStorageTag":$tag,
+                   "Value":$ptr1, "Value":$ptr2)>
   ];
 
   let hasVerifier = 1;
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 5806295cedb198c..7cb4b5c346ad972 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -783,24 +783,27 @@ def NVVM_SyncWarpOp :
   let assemblyFormat = "$mask attr-dict `:` type($mask)";
 }
 
-
-def NVVM_ElectSyncOp : NVVM_Op<"elect.sync", 
-                  [DeclareOpInterfaceMethods<BasicPtxBuilderOpInterface>]>
+def NVVM_ElectSyncOp : NVVM_Op<"elect.sync">
 {  
+  let summary = "Elect one leader thread";
+  let description = [{
+    The `elect.sync` instruction elects one predicated active leader
+    thread from among a set of threads specified in membermask.
+    The membermask is set to `0xFFFFFFFF` for the current version
+    of this Op. The predicate result is set to `True` for the
+    leader thread, and `False` for all other threads.
+
+    [For more information, see PTX ISA]
+    (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-elect-sync)
+  }];
+
   let results = (outs I1:$pred);
   let assemblyFormat = "attr-dict `->` type(results)";  
-  let extraClassDefinition = [{        
-    std::string $cppClass::getPtx() { 
-      return std::string(
-        "{                                  \n"
-        ".reg .u32 rx;                      \n"
-        ".reg .pred px;                     \n"
-        " mov.pred %0, 0;                   \n"
-        "    elect.sync rx | px, 0xFFFFFFFF;\n"
-        "@px mov.pred %0, 1;                \n"
-        "}\n"
-      ); 
-    }
+  string llvmBuilder = [{
+    auto *resultTuple = createIntrinsicCall(builder,
+        llvm::Intrinsic::nvvm_elect_sync, {builder.getInt32(0xFFFFFFFF)});
+    // Extract the second value into $pred
+    $pred = builder.CreateExtractValue(resultTuple, 1);
   }];
 }
 
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index bfc609bd708164a..c2fee8ea55c960a 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -30,6 +30,7 @@ class LinalgStructuredBase_Op<string mnemonic, list<Trait> props>
        SingleBlockImplicitTerminator<"YieldOp">,
        DeclareOpInterfaceMethods<MemoryEffectsOpInterface>,
        DeclareOpInterfaceMethods<ConditionallySpeculatable>,
+       RecursiveMemoryEffects,
        DestinationStyleOpInterface,
        LinalgStructuredInterface,
        ReifyRankedShapedTypeOpInterface], props)> {
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 040c04b0410ecf5..abf446887c54425 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -84,6 +84,26 @@ def ApplyFoldAddIntoDestPatternsOp : Op<Transform_Dialect,
   let assemblyFormat = "attr-dict";
 }
 
+def ApplyPadVectorizationPatternsOp : Op<Transform_Dialect,
+    "apply_patterns.linalg.pad_vectorization",
+    [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+  let description = [{
+    Apply patterns that vectorize tensor.pad.
+
+    These patterns rewrite tensor.pad Ops using vector.transfer_read and
+    vector.transfer_write operations. This is done either by:
+      1. Folding tensor.pad with an existing vector.transfer_read /
+      vector.transfer_write Op (generated prior to running these patterns). 
+      2. Rewriting it (when matched together with q tensor.insert_slice
+      consumer Op) as a vector.transfer_read + vector.transfer_write pair.
+
+    In both cases, these patterns look at producers and consumers for the
+    matched tensor.pad Op to find opportunities for vectorization.
+  }];
+
+  let assemblyFormat = "attr-dict";
+}
+
 //===----------------------------------------------------------------------===//
 // BufferizeToAllocationOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 8d84f01ac271438..9ea5728b697a973 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1503,18 +1503,13 @@ using OptimizeCopyFn =
 
 /// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
 /// InsertSliceOp. For now, only constant padding values are supported.
-/// `OptimizeCopyFn` can be used to customize copying step optimization.
 struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
-  GeneralizePadOpPattern(MLIRContext *context,
-                         OptimizeCopyFn optimizeCopyFn = nullptr,
-                         PatternBenefit benefit = 1)
-      : OpRewritePattern<tensor::PadOp>(context, benefit),
-        optimizeCopyFn(std::move(optimizeCopyFn)) {}
+  GeneralizePadOpPattern(MLIRContext *context, PatternBenefit benefit = 1)
+      : OpRewritePattern<tensor::PadOp>(context, benefit) {}
   LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const override;
 
 protected:
-  OptimizeCopyFn optimizeCopyFn;
   Value createFillOrGenerateOp(RewriterBase &rewriter, tensor::PadOp padOp,
                                Value dest,
                                const SmallVector<Value> &dynSizes) const;
@@ -1663,6 +1658,11 @@ void populateDecomposeConvolutionPatterns(RewritePatternSet &patterns,
 /// \see rewriteInIm2Col for more details.
 void populateConvertConv2DToImg2ColPatterns(RewritePatternSet &patterns);
 
+/// Populates `patterns` with vectorisation patterns for tensor.insert_slice.
+/// TODO: Avoid having a dedicated `populate{}` for one pattern. Instead, either
+/// expand or merge with other `populate{}`.
+void populateInsertSliceVectorizationPatterns(RewritePatternSet &patterns);
+
 /// Populates `patterns` with patterns that vectorize tensor.pad.
 /// These patterns are meant to apply in a complementary fashion. Benefits
 /// are used to encode a certain ordering of pattern application. To avoid
diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
index ca3326dbbef5191..a761a77a407e879 100644
--- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
+++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h
@@ -32,7 +32,8 @@ namespace memref {
 bool isStaticShapeAndContiguousRowMajor(MemRefType type);
 
 /// For a `memref` with `offset`, `sizes` and `strides`, returns the
-/// offset and size to use for the linearized `memref`.
+/// offset, size, and potentially the size padded at the front to use for the
+/// linearized `memref`.
 /// - If the linearization is done for emulating load/stores of
 ///   element type with bitwidth `srcBits` using element type with
 ///   bitwidth `dstBits`, the linearized offset and size are
@@ -42,9 +43,14 @@ bool isStaticShapeAndContiguousRowMajor(MemRefType type);
 ///   index to use in the linearized `memref`. The linearized index
 ///   is also scaled down by `dstBits`/`srcBits`. If `indices` is not provided
 ///   0, is returned for the linearized index.
+/// - If the size of the load/store is smaller than the linearized memref
+/// load/store, the memory region emulated is larger than the actual memory
+/// region needed. `intraDataOffset` returns the element offset of the data
+/// relevant at the beginning.
 struct LinearizedMemRefInfo {
   OpFoldResult linearizedOffset;
   OpFoldResult linearizedSize;
+  OpFoldResult intraDataOffset;
 };
 std::pair<LinearizedMemRefInfo, OpFoldResult> getLinearizedMemRefOffsetAndSize(
     OpBuilder &builder, Location loc, int srcBits, int dstBits,
diff --git a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
index 20880d94a83cacb..5dba8c5e57ba861 100644
--- a/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
+++ b/mlir/include/mlir/Dialect/SCF/TransformOps/SCFTransformOps.td
@@ -146,7 +146,7 @@ def LoopPeelOp : Op<Transform_Dialect, "loop.peel",
   let summary = "Peels the first or last iteration of the loop";
   let description = [{
      Rewrite the given loop with a main loop and a partial (first or last) loop.
-     When the `peelFront` option is set as true, the first iteration is peeled off.
+     When the `peelFront` option is set to true, the first iteration is peeled off.
      Otherwise, updates the given loop so that its step evenly divides its range and puts
      the remaining iteration into a separate loop or a conditional.
 
@@ -156,18 +156,20 @@ def LoopPeelOp : Op<Transform_Dialect, "loop.peel",
      #### Return modes
 
      This operation ignores non-scf::ForOp ops and drops them in the return.
-
-     When `peelFront` is true, this operation returns two scf::ForOp Ops, the
-     first scf::ForOp corresponds to the first iteration of the loop which can
-     be canonicalized away in the following optimization. The second loop Op
-     contains the remaining iteration, and the new lower bound is the original
-     lower bound plus the number of steps.
-
-     When `peelFront` is not true, this operation returns two scf::ForOp Ops, with the first
-     scf::ForOp satisfying: "the loop trip count is divisible by the step".
-     The second loop Op contains the remaining iteration. Note that even though the
-     Payload IR modification may be performed in-place, this operation consumes
-     the operand handle and produces a new one.
+     The op returns two loops, the peeled loop which has trip count divisible
+     by the step, and the remainder loop.
+
+     When `peelFront` is true, the first result (remainder loop) executes all
+     but the first iteration of the target loop. The second result (peeled
+     loop) corresponds to the first iteration of the loop which can be
+     canonicalized away in the following optimizations.
+
+     When `peelFront` is false, the first result (peeled loop) is the portion
+     of the target loop with the highest upper bound that is divisible by the
+     step. The second result (remainder loop) contains the remaining iterations. 
+     
+     Note that even though the Payload IR modification may be performed
+     in-place, this operation consumes the operand handle and produces a new one.
 
      #### Return Modes
 
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
index fb8411418ff9a00..b70599df6f5033c 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.h
@@ -20,9 +20,6 @@ namespace mlir {
 #define GEN_PASS_DECL
 #include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
 
-/// Creates a pass that bufferizes the SCF dialect.
-std::unique_ptr<Pass> createSCFBufferizePass();
-
 /// Creates a pass that specializes for loop for unrolling and
 /// vectorization.
 std::unique_ptr<Pass> createForLoopSpecializationPass();
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
index 53d1ae10dc87d84..6e5ef96c450aa4a 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Transforms/Passes.td
@@ -11,13 +11,6 @@
 
 include "mlir/Pass/PassBase.td"
 
-def SCFBufferize : Pass<"scf-bufferize"> {
-  let summary = "Bufferize the scf dialect.";
-  let constructor = "mlir::createSCFBufferizePass()";
-  let dependentDialects = ["bufferization::BufferizationDialect",
-                           "memref::MemRefDialect"];
-}
-
 // Note: Making these canonicalization patterns would require a dependency
 // of the SCF dialect on the Affine/Tensor/MemRef dialects or vice versa.
 def SCFForLoopCanonicalization
diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
index 6ccbc40bdd6034a..2e9c297f20182af 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h
@@ -150,7 +150,7 @@ std::unique_ptr<Pass> createLowerForeachToSCFPass();
 //===----------------------------------------------------------------------===//
 
 /// Type converter for iter_space and iterator.
-struct SparseIterationTypeConverter : public OneToNTypeConverter {
+struct SparseIterationTypeConverter : public TypeConverter {
   SparseIterationTypeConverter();
 };
 
diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
index c02b16ea931706d..474f4ccf4891de9 100644
--- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td
@@ -1819,17 +1819,17 @@ def Vector_MaskedLoadOp :
   Vector_Op<"maskedload">,
     Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
                Variadic<Index>:$indices,
-               VectorOfRankAndType<[1], [I1]>:$mask,
-               VectorOfRank<[1]>:$pass_thru)>,
-    Results<(outs VectorOfRank<[1]>:$result)> {
+               VectorOf<[I1]>:$mask,
+               AnyVector:$pass_thru)>,
+    Results<(outs AnyVector:$result)> {
 
   let summary = "loads elements from memory into a vector as defined by a mask vector";
 
   let description = [{
-    The masked load reads elements from memory into a 1-D vector as defined
-    by a base with indices and a 1-D mask vector. When the mask is set, the
+    The masked load reads elements from memory into a vector as defined
+    by a base with indices and a mask vector. When the mask is set, the
     element is read from memory. Otherwise, the corresponding element is taken
-    from a 1-D pass-through vector. Informally the semantics are:
+    from a pass-through vector. Informally the semantics are:
     ```
     result[0] := if mask[0] then base[i + 0] else pass_thru[0]
     result[1] := if mask[1] then base[i + 1] else pass_thru[1]
@@ -1882,14 +1882,14 @@ def Vector_MaskedStoreOp :
   Vector_Op<"maskedstore">,
     Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
                Variadic<Index>:$indices,
-               VectorOfRankAndType<[1], [I1]>:$mask,
-               VectorOfRank<[1]>:$valueToStore)> {
+               VectorOf<[I1]>:$mask,
+               AnyVector:$valueToStore)> {
 
   let summary = "stores elements from a vector into memory as defined by a mask vector";
 
   let description = [{
-    The masked store operation writes elements from a 1-D vector into memory
-    as defined by a base with indices and a 1-D mask vector. When the mask is
+    The masked store operation writes elements from a vector into memory
+    as defined by a base with indices and a mask vector. When the mask is
     set, the corresponding element from the vector is written to memory. Otherwise,
     no action is taken for the element. Informally the semantics are:
     ```
@@ -2076,23 +2076,26 @@ def Vector_ExpandLoadOp :
   Vector_Op<"expandload">,
     Arguments<(ins Arg<AnyMemRef, "", [MemRead]>:$base,
                Variadic<Index>:$indices,
-               VectorOfRankAndType<[1], [I1]>:$mask,
-               VectorOfRank<[1]>:$pass_thru)>,
-    Results<(outs VectorOfRank<[1]>:$result)> {
+               VectorOf<[I1]>:$mask,
+               AnyVector:$pass_thru)>,
+    Results<(outs AnyVector:$result)> {
 
   let summary = "reads elements from memory and spreads them into a vector as defined by a mask";
 
   let description = [{
-    The expand load reads elements from memory into a 1-D vector as defined
-    by a base with indices and a 1-D mask vector. When the mask is set, the
-    next element is read from memory. Otherwise, the corresponding element
-    is taken from a 1-D pass-through vector. Informally the semantics are:
+    The expand load reads elements from memory into a vector as defined by a
+    base with indices and a mask vector. Expansion only applies to the innermost
+    dimension. When the mask is set, the next element is read from memory.
+    Otherwise, the corresponding element is taken from a pass-through vector.
+    Informally the semantics are:
+
     ```
     index = i
     result[0] := if mask[0] then base[index++] else pass_thru[0]
     result[1] := if mask[1] then base[index++] else pass_thru[1]
     etc.
     ```
+
     Note that the index increment is done conditionally.
 
     If a mask bit is set and the corresponding index is out-of-bounds for the
@@ -2140,22 +2143,25 @@ def Vector_CompressStoreOp :
   Vector_Op<"compressstore">,
     Arguments<(ins Arg<AnyMemRef, "", [MemWrite]>:$base,
                Variadic<Index>:$indices,
-               VectorOfRankAndType<[1], [I1]>:$mask,
-               VectorOfRank<[1]>:$valueToStore)> {
+               VectorOf<[I1]>:$mask,
+               AnyVector:$valueToStore)> {
 
   let summary = "writes elements selectively from a vector as defined by a mask";
 
   let description = [{
-    The compress store operation writes elements from a 1-D vector into memory
-    as defined by a base with indices and a 1-D mask vector. When the mask is
-    set, the corresponding element from the vector is written next to memory.
-    Otherwise, no action is taken for the element. Informally the semantics are:
+    The compress store operation writes elements from a vector into memory as
+    defined by a base with indices and a mask vector. Compression only applies
+    to the innermost dimension. When the mask is set, the corresponding element
+    from the vector is written next to memory.  Otherwise, no action is taken
+    for the element. Informally the semantics are:
+
     ```
     index = i
     if (mask[0]) base[index++] = value[0]
     if (mask[1]) base[index++] = value[1]
     etc.
     ```
+
     Note that the index increment is done conditionally.
 
     If a mask bit is set and the corresponding index is out-of-bounds for the
@@ -3084,7 +3090,6 @@ def Vector_WarpExecuteOnLane0Op : Vector_Op<"warp_execute_on_lane_0",
 
   let skipDefaultBuilders = 1;
   let builders = [
-    OpBuilder<(ins "Value":$laneid, "int64_t":$warpSize)>,
     OpBuilder<(ins "TypeRange":$resultTypes, "Value":$laneid,
                    "int64_t":$warpSize)>,
     // `blockArgTypes` are different than `args` types as they are they
diff --git a/mlir/include/mlir/Support/IndentedOstream.h b/mlir/include/mlir/Support/IndentedOstream.h
index 101aa8b631d299d..eeab36806c4ee70 100644
--- a/mlir/include/mlir/Support/IndentedOstream.h
+++ b/mlir/include/mlir/Support/IndentedOstream.h
@@ -166,8 +166,7 @@ inline void mlir::raw_indented_ostream::write_impl(const char *ptr,
       break;
     }
 
-    auto split =
-        std::make_pair(str.slice(0, idx), str.slice(idx + 1, StringRef::npos));
+    auto split = std::make_pair(str.substr(0, idx), str.substr(idx + 1));
     // Print empty new line without spaces if line only has spaces and no extra
     // prefix is requested.
     if (!split.first.ltrim().empty() || !currentExtraPrefix.empty())
diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h
index 5ff36160dd61620..5e5957170e646c3 100644
--- a/mlir/include/mlir/Transforms/DialectConversion.h
+++ b/mlir/include/mlir/Transforms/DialectConversion.h
@@ -173,7 +173,9 @@ class TypeConverter {
   /// conversion has finished.
   ///
   /// Note: Target materializations may optionally accept an additional Type
-  /// parameter, which is the original type of the SSA value.
+  /// parameter, which is the original type of the SSA value. Furthermore, `T`
+  /// can be a TypeRange; in that case, the function must return a
+  /// SmallVector<Value>.
 
   /// This method registers a materialization that will be called when
   /// converting (potentially multiple) block arguments that were the result of
@@ -210,6 +212,9 @@ class TypeConverter {
   /// will be invoked with: outputType = "t3", inputs = "v2",
   // originalType = "t1". Note  that the original type "t1" cannot be recovered
   /// from just "t3" and "v2"; that's why the originalType parameter exists.
+  ///
+  /// Note: During a 1:N conversion, the result types can be a TypeRange. In
+  /// that case the materialization produces a SmallVector<Value>.
   template <typename FnT, typename T = typename llvm::function_traits<
                               std::decay_t<FnT>>::template arg_t<1>>
   void addTargetMaterialization(FnT &&callback) {
@@ -316,6 +321,11 @@ class TypeConverter {
   Value materializeTargetConversion(OpBuilder &builder, Location loc,
                                     Type resultType, ValueRange inputs,
                                     Type originalType = {}) const;
+  SmallVector<Value> materializeTargetConversion(OpBuilder &builder,
+                                                 Location loc,
+                                                 TypeRange resultType,
+                                                 ValueRange inputs,
+                                                 Type originalType = {}) const;
 
   /// Convert an attribute present `attr` from within the type `type` using
   /// the registered conversion functions. If no applicable conversion has been
@@ -340,9 +350,9 @@ class TypeConverter {
 
   /// The signature of the callback used to materialize a target conversion.
   ///
-  /// Arguments: builder, result type, inputs, location, original type
-  using TargetMaterializationCallbackFn =
-      std::function<Value(OpBuilder &, Type, ValueRange, Location, Type)>;
+  /// Arguments: builder, result types, inputs, location, original type
+  using TargetMaterializationCallbackFn = std::function<SmallVector<Value>(
+      OpBuilder &, TypeRange, ValueRange, Location, Type)>;
 
   /// The signature of the callback used to convert a type attribute.
   using TypeAttributeConversionCallbackFn =
@@ -409,22 +419,46 @@ class TypeConverter {
   /// callback.
   ///
   /// With callback of form:
-  /// `Value(OpBuilder &, T, ValueRange, Location, Type)`
+  /// - Value(OpBuilder &, T, ValueRange, Location, Type)
+  /// - SmallVector<Value>(OpBuilder &, TypeRange, ValueRange, Location, Type)
   template <typename T, typename FnT>
   std::enable_if_t<
       std::is_invocable_v<FnT, OpBuilder &, T, ValueRange, Location, Type>,
       TargetMaterializationCallbackFn>
   wrapTargetMaterialization(FnT &&callback) const {
     return [callback = std::forward<FnT>(callback)](
-               OpBuilder &builder, Type resultType, ValueRange inputs,
-               Location loc, Type originalType) -> Value {
-      if (T derivedType = dyn_cast<T>(resultType))
-        return callback(builder, derivedType, inputs, loc, originalType);
-      return Value();
+               OpBuilder &builder, TypeRange resultTypes, ValueRange inputs,
+               Location loc, Type originalType) -> SmallVector<Value> {
+      SmallVector<Value> result;
+      if constexpr (std::is_same<T, TypeRange>::value) {
+        // This is a 1:N target materialization. Return the produces values
+        // directly.
+        result = callback(builder, resultTypes, inputs, loc, originalType);
+      } else if constexpr (std::is_assignable<Type, T>::value) {
+        // This is a 1:1 target materialization. Invoke the callback only if a
+        // single SSA value is requested.
+        if (resultTypes.size() == 1) {
+          // Invoke the callback only if the type class of the callback matches
+          // the requested result type.
+          if (T derivedType = dyn_cast<T>(resultTypes.front())) {
+            // 1:1 materializations produce single values, but we store 1:N
+            // target materialization functions in the type converter. Wrap the
+            // result value in a SmallVector<Value>.
+            Value val =
+                callback(builder, derivedType, inputs, loc, originalType);
+            if (val)
+              result.push_back(val);
+          }
+        }
+      } else {
+        static_assert(sizeof(T) == 0, "T must be a Type or a TypeRange");
+      }
+      return result;
     };
   }
   /// With callback of form:
-  /// `Value(OpBuilder &, T, ValueRange, Location)`
+  /// - Value(OpBuilder &, T, ValueRange, Location)
+  /// - SmallVector<Value>(OpBuilder &, TypeRange, ValueRange, Location)
   template <typename T, typename FnT>
   std::enable_if_t<
       std::is_invocable_v<FnT, OpBuilder &, T, ValueRange, Location>,
@@ -432,9 +466,9 @@ class TypeConverter {
   wrapTargetMaterialization(FnT &&callback) const {
     return wrapTargetMaterialization<T>(
         [callback = std::forward<FnT>(callback)](
-            OpBuilder &builder, T resultType, ValueRange inputs, Location loc,
-            Type originalType) -> Value {
-          return callback(builder, resultType, inputs, loc);
+            OpBuilder &builder, T resultTypes, ValueRange inputs, Location loc,
+            Type originalType) {
+          return callback(builder, resultTypes, inputs, loc);
         });
   }
 
diff --git a/mlir/include/mlir/Transforms/OneToNTypeConversion.h b/mlir/include/mlir/Transforms/OneToNTypeConversion.h
index c59a3a52f028f32..7b4dd65cbff7b2d 100644
--- a/mlir/include/mlir/Transforms/OneToNTypeConversion.h
+++ b/mlir/include/mlir/Transforms/OneToNTypeConversion.h
@@ -33,49 +33,6 @@
 
 namespace mlir {
 
-/// Extends `TypeConverter` with 1:N target materializations. Such
-/// materializations have to provide the "reverse" of 1:N type conversions,
-/// i.e., they need to materialize N values with target types into one value
-/// with a source type (which isn't possible in the base class currently).
-class OneToNTypeConverter : public TypeConverter {
-public:
-  /// Callback that expresses user-provided materialization logic from the given
-  /// value to N values of the given types. This is useful for expressing target
-  /// materializations for 1:N type conversions, which materialize one value in
-  /// a source type as N values in target types.
-  using OneToNMaterializationCallbackFn =
-      std::function<std::optional<SmallVector<Value>>(OpBuilder &, TypeRange,
-                                                      Value, Location)>;
-
-  /// Creates the mapping of the given range of original types to target types
-  /// of the conversion and stores that mapping in the given (signature)
-  /// conversion. This function simply calls
-  /// `TypeConverter::convertSignatureArgs` and exists here with a different
-  /// name to reflect the broader semantic.
-  LogicalResult computeTypeMapping(TypeRange types,
-                                   SignatureConversion &result) const {
-    return convertSignatureArgs(types, result);
-  }
-
-  /// Applies one of the user-provided 1:N target materializations. If several
-  /// exists, they are tried out in the reverse order in which they have been
-  /// added until the first one succeeds. If none succeeds, the functions
-  /// returns `std::nullopt`.
-  std::optional<SmallVector<Value>>
-  materializeTargetConversion(OpBuilder &builder, Location loc,
-                              TypeRange resultTypes, Value input) const;
-
-  /// Adds a 1:N target materialization to the converter. Such materializations
-  /// build IR that converts N values with target types into 1 value of the
-  /// source type.
-  void addTargetMaterialization(OneToNMaterializationCallbackFn &&callback) {
-    oneToNTargetMaterializations.emplace_back(std::move(callback));
-  }
-
-private:
-  SmallVector<OneToNMaterializationCallbackFn> oneToNTargetMaterializations;
-};
-
 /// Stores a 1:N mapping of types and provides several useful accessors. This
 /// class extends `SignatureConversion`, which already supports 1:N type
 /// mappings but lacks some accessors into the mapping as well as access to the
@@ -295,7 +252,7 @@ class OneToNOpConversionPattern : public OneToNConversionPattern {
 /// not fail if some ops or types remain unconverted (i.e., the conversion is
 /// only "partial").
 LogicalResult
-applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter,
+applyPartialOneToNConversion(Operation *op, TypeConverter &typeConverter,
                              const FrozenRewritePatternSet &patterns);
 
 /// Add a pattern to the given pattern list to convert the signature of a
diff --git a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp
index d3ee89743da9db5..1c592d665f3e4c5 100644
--- a/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp
+++ b/mlir/lib/Conversion/ControlFlowToSCF/ControlFlowToSCF.cpp
@@ -16,7 +16,6 @@
 #include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/Dialect/UB/IR/UBOps.h"
 #include "mlir/Pass/Pass.h"
diff --git a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
index 27c43e0daad0728..c046ea1b824fc85 100644
--- a/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
+++ b/mlir/lib/Conversion/FuncToLLVM/FuncToLLVM.cpp
@@ -273,7 +273,7 @@ static void wrapExternalFunction(OpBuilder &builder, Location loc,
 static void restoreByValRefArgumentType(
     ConversionPatternRewriter &rewriter, const LLVMTypeConverter &typeConverter,
     ArrayRef<std::optional<NamedAttribute>> byValRefNonPtrAttrs,
-    LLVM::LLVMFuncOp funcOp) {
+    ArrayRef<BlockArgument> oldBlockArgs, LLVM::LLVMFuncOp funcOp) {
   // Nothing to do for function declarations.
   if (funcOp.isExternal())
     return;
@@ -281,8 +281,8 @@ static void restoreByValRefArgumentType(
   ConversionPatternRewriter::InsertionGuard guard(rewriter);
   rewriter.setInsertionPointToStart(&funcOp.getFunctionBody().front());
 
-  for (const auto &[arg, byValRefAttr] :
-       llvm::zip(funcOp.getArguments(), byValRefNonPtrAttrs)) {
+  for (const auto &[arg, oldArg, byValRefAttr] :
+       llvm::zip(funcOp.getArguments(), oldBlockArgs, byValRefNonPtrAttrs)) {
     // Skip argument if no `llvm.byval` or `llvm.byref` attribute.
     if (!byValRefAttr)
       continue;
@@ -295,7 +295,7 @@ static void restoreByValRefArgumentType(
         cast<TypeAttr>(byValRefAttr->getValue()).getValue());
 
     auto valueArg = rewriter.create<LLVM::LoadOp>(arg.getLoc(), resTy, arg);
-    rewriter.replaceAllUsesExcept(arg, valueArg, valueArg);
+    rewriter.replaceUsesOfBlockArgument(oldArg, valueArg);
   }
 }
 
@@ -309,6 +309,10 @@ mlir::convertFuncOpToLLVMFuncOp(FunctionOpInterface funcOp,
     return rewriter.notifyMatchFailure(
         funcOp, "Only support FunctionOpInterface with FunctionType");
 
+  // Keep track of the entry block arguments. They will be needed later.
+  SmallVector<BlockArgument> oldBlockArgs =
+      llvm::to_vector(funcOp.getArguments());
+
   // Convert the original function arguments. They are converted using the
   // LLVMTypeConverter provided to this legalization pattern.
   auto varargsAttr = funcOp->getAttrOfType<BoolAttr>(varargsAttrName);
@@ -438,7 +442,7 @@ mlir::convertFuncOpToLLVMFuncOp(FunctionOpInterface funcOp,
   // pointee type in the function body when converting `llvm.byval`/`llvm.byref`
   // function arguments.
   restoreByValRefArgumentType(rewriter, converter, byValRefNonPtrAttrs,
-                              newFuncOp);
+                              oldBlockArgs, newFuncOp);
 
   if (!shouldUseBarePtrCallConv(funcOp, &converter)) {
     if (funcOp->getAttrOfType<UnitAttr>(
diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index 1cf8a1acb319358..3b94abd88f9ed22 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -61,6 +61,11 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
                                   SourceOp>::value,
                   "expected op with same operand and result types");
 
+    if (!op->template getParentOfType<FunctionOpInterface>()) {
+      return rewriter.notifyMatchFailure(
+          op, "expected op to be within a function region");
+    }
+
     SmallVector<Value, 1> castedOperands;
     for (Value operand : adaptor.getOperands())
       castedOperands.push_back(maybeCast(operand, rewriter));
diff --git a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt
index cbe85789b29a371..7fc4af540318552 100644
--- a/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/LinalgToStandard/CMakeLists.txt
@@ -15,6 +15,7 @@ add_mlir_conversion_library(MLIRLinalgToStandard
   MLIRIR
   MLIRLinalgDialect
   MLIRLinalgTransforms
+  MLIRLLVMDialect
   MLIRMemRefDialect
   MLIRPass
   MLIRSCFDialect
diff --git a/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp b/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp
index 3a567643ffdb8fc..df5396ac628cf67 100644
--- a/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp
+++ b/mlir/lib/Conversion/MathToFuncs/MathToFuncs.cpp
@@ -781,6 +781,9 @@ struct ConvertMathToFuncsPass
   // or equal to minWidthOfFPowIExponent option value.
   bool isFPowIConvertible(math::FPowIOp op);
 
+  // Reture true, if operation is integer type.
+  bool isConvertible(Operation *op);
+
   // Generate outlined implementations for power operations
   // and store them in funcImpls map.
   void generateOpImplementations();
@@ -798,13 +801,17 @@ bool ConvertMathToFuncsPass::isFPowIConvertible(math::FPowIOp op) {
   return (expTy && expTy.getWidth() >= minWidthOfFPowIExponent);
 }
 
+bool ConvertMathToFuncsPass::isConvertible(Operation *op) {
+  return isa<IntegerType>(getElementTypeOrSelf(op->getResult(0).getType()));
+}
+
 void ConvertMathToFuncsPass::generateOpImplementations() {
   ModuleOp module = getOperation();
 
   module.walk([&](Operation *op) {
     TypeSwitch<Operation *>(op)
         .Case<math::CountLeadingZerosOp>([&](math::CountLeadingZerosOp op) {
-          if (!convertCtlz)
+          if (!convertCtlz || !isConvertible(op))
             return;
           Type resultType = getElementTypeOrSelf(op.getResult().getType());
 
@@ -816,6 +823,9 @@ void ConvertMathToFuncsPass::generateOpImplementations() {
             entry.first->second = createCtlzFunc(&module, resultType);
         })
         .Case<math::IPowIOp>([&](math::IPowIOp op) {
+          if (!isConvertible(op))
+            return;
+
           Type resultType = getElementTypeOrSelf(op.getResult().getType());
 
           // Generate the software implementation of this operation,
@@ -873,9 +883,12 @@ void ConvertMathToFuncsPass::runOnOperation() {
                          func::FuncDialect, scf::SCFDialect,
                          vector::VectorDialect>();
 
-  target.addIllegalOp<math::IPowIOp>();
-  if (convertCtlz)
-    target.addIllegalOp<math::CountLeadingZerosOp>();
+  target.addDynamicallyLegalOp<math::IPowIOp>(
+      [this](math::IPowIOp op) { return !isConvertible(op); });
+  if (convertCtlz) {
+    target.addDynamicallyLegalOp<math::CountLeadingZerosOp>(
+        [this](math::CountLeadingZerosOp op) { return !isConvertible(op); });
+  }
   target.addDynamicallyLegalOp<math::FPowIOp>(
       [this](math::FPowIOp op) { return !isFPowIConvertible(op); });
   if (failed(applyPartialConversion(module, target, std::move(patterns))))
diff --git a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
index 61c46e9bfe250e5..0a4eb97474f3ab9 100644
--- a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
+++ b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_conversion_library(MLIRMathToLibm
   MLIRArithDialect
   MLIRDialectUtils
   MLIRFuncDialect
+  MLIRLLVMDialect
   MLIRMathDialect
   MLIRPass
   MLIRTransformUtils
diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
index f28473a108e1b54..87c0936cee229ec 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
@@ -1148,6 +1148,12 @@ class LoopPattern : public SPIRVToLLVMConversion<spirv::LoopOp> {
     if (loopOp.getLoopControl() != spirv::LoopControl::None)
       return failure();
 
+    // `spirv.mlir.loop` with empty region is redundant and should be erased.
+    if (loopOp.getBody().empty()) {
+      rewriter.eraseOp(loopOp);
+      return success();
+    }
+
     Location loc = loopOp.getLoc();
 
     // Split the current block after `spirv.mlir.loop`. The remaining ops will
diff --git a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp
index 7cfcc4180539c2b..6de151594e3e9c6 100644
--- a/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.cpp
@@ -107,9 +107,10 @@ struct SelectOpInterface
     // If trueValue <= falseValue:
     // * result <= falseValue
     // * result >= trueValue
-    if (cstr.compare(/*lhs=*/{trueValue, dim},
-                     ValueBoundsConstraintSet::ComparisonOperator::LE,
-                     /*rhs=*/{falseValue, dim})) {
+    if (cstr.populateAndCompare(
+            /*lhs=*/{trueValue, dim},
+            ValueBoundsConstraintSet::ComparisonOperator::LE,
+            /*rhs=*/{falseValue, dim})) {
       if (dim) {
         cstr.bound(value)[*dim] >= cstr.getExpr(trueValue, dim);
         cstr.bound(value)[*dim] <= cstr.getExpr(falseValue, dim);
@@ -121,9 +122,10 @@ struct SelectOpInterface
     // If falseValue <= trueValue:
     // * result <= trueValue
     // * result >= falseValue
-    if (cstr.compare(/*lhs=*/{falseValue, dim},
-                     ValueBoundsConstraintSet::ComparisonOperator::LE,
-                     /*rhs=*/{trueValue, dim})) {
+    if (cstr.populateAndCompare(
+            /*lhs=*/{falseValue, dim},
+            ValueBoundsConstraintSet::ComparisonOperator::LE,
+            /*rhs=*/{trueValue, dim})) {
       if (dim) {
         cstr.bound(value)[*dim] >= cstr.getExpr(falseValue, dim);
         cstr.bound(value)[*dim] <= cstr.getExpr(trueValue, dim);
diff --git a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp
index 4968c4fc463d04b..e908a536e6fb271 100644
--- a/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp
+++ b/mlir/lib/Dialect/ArmSME/Transforms/VectorLegalization.cpp
@@ -921,7 +921,7 @@ struct VectorLegalizationPass
     : public arm_sme::impl::VectorLegalizationBase<VectorLegalizationPass> {
   void runOnOperation() override {
     auto *context = &getContext();
-    OneToNTypeConverter converter;
+    TypeConverter converter;
     RewritePatternSet patterns(context);
     converter.addConversion([](Type type) { return type; });
     converter.addConversion(
diff --git a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp
index 2b2d10a7733eceb..004d73a77e53599 100644
--- a/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/DecomposeMemRefs.cpp
@@ -29,6 +29,17 @@ namespace mlir {
 
 using namespace mlir;
 
+static MemRefType inferCastResultType(Value source, OpFoldResult offset) {
+  auto sourceType = cast<BaseMemRefType>(source.getType());
+  SmallVector<int64_t> staticOffsets;
+  SmallVector<Value> dynamicOffsets;
+  dispatchIndexOpFoldResults(offset, dynamicOffsets, staticOffsets);
+  auto stridedLayout =
+      StridedLayoutAttr::get(source.getContext(), staticOffsets.front(), {});
+  return MemRefType::get({}, sourceType.getElementType(), stridedLayout,
+                         sourceType.getMemorySpace());
+}
+
 static void setInsertionPointToStart(OpBuilder &builder, Value val) {
   if (auto *parentOp = val.getDefiningOp()) {
     builder.setInsertionPointAfter(parentOp);
@@ -98,7 +109,7 @@ static Value getFlatMemref(OpBuilder &rewriter, Location loc, Value source,
   SmallVector<OpFoldResult> offsetsTemp = getAsOpFoldResult(offsets);
   auto &&[base, offset, ignore] =
       getFlatOffsetAndStrides(rewriter, loc, source, offsetsTemp);
-  auto retType = cast<MemRefType>(base.getType());
+  MemRefType retType = inferCastResultType(base, offset);
   return rewriter.create<memref::ReinterpretCastOp>(loc, retType, base, offset,
                                                     std::nullopt, std::nullopt);
 }
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index cc73878a64ff67e..c9bc9533ca2a6bd 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -3438,7 +3438,44 @@ void InlineAsmOp::getEffects(
 void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state,
                            mlir::Value cond) {
   return build(builder, state, cond, /*op_bundle_operands=*/{},
-               /*op_bundle_tags=*/{});
+               /*op_bundle_tags=*/ArrayAttr{});
+}
+
+void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state,
+                           Value cond,
+                           ArrayRef<llvm::OperandBundleDefT<Value>> opBundles) {
+  SmallVector<ValueRange> opBundleOperands;
+  SmallVector<Attribute> opBundleTags;
+  opBundleOperands.reserve(opBundles.size());
+  opBundleTags.reserve(opBundles.size());
+
+  for (const llvm::OperandBundleDefT<Value> &bundle : opBundles) {
+    opBundleOperands.emplace_back(bundle.inputs());
+    opBundleTags.push_back(
+        StringAttr::get(builder.getContext(), bundle.getTag()));
+  }
+
+  auto opBundleTagsAttr = ArrayAttr::get(builder.getContext(), opBundleTags);
+  return build(builder, state, cond, opBundleOperands, opBundleTagsAttr);
+}
+
+void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state,
+                           Value cond, llvm::StringRef tag, ValueRange args) {
+  llvm::OperandBundleDefT<Value> opBundle(
+      tag.str(), SmallVector<Value>(args.begin(), args.end()));
+  return build(builder, state, cond, opBundle);
+}
+
+void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state,
+                           Value cond, AssumeAlignTag, Value ptr, Value align) {
+  return build(builder, state, cond, "align", ValueRange{ptr, align});
+}
+
+void LLVM::AssumeOp::build(OpBuilder &builder, OperationState &state,
+                           Value cond, AssumeSeparateStorageTag, Value ptr1,
+                           Value ptr2) {
+  return build(builder, state, cond, "separate_storage",
+               ValueRange{ptr1, ptr2});
 }
 
 LogicalResult LLVM::AssumeOp::verify() { return verifyOperandBundles(*this); }
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 1f1d8ad89ae2b9b..9c0ab4f41b855a2 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -253,6 +253,12 @@ void transform::ApplyFoldAddIntoDestPatternsOp::populatePatterns(
   linalg::populateFoldAddIntoDestPatterns(patterns);
 }
 
+void transform::ApplyPadVectorizationPatternsOp::populatePatterns(
+    RewritePatternSet &patterns) {
+  linalg::populatePadOpVectorizationPatterns(patterns);
+  linalg::populateInsertSliceVectorizationPatterns(patterns);
+}
+
 //===----------------------------------------------------------------------===//
 // BufferizeToAllocationOp
 //===----------------------------------------------------------------------===//
@@ -3477,6 +3483,9 @@ transform::VectorizeChildrenAndApplyPatternsOp::applyToOne(
 
   patterns.add<CopyVectorizationPattern>(ctx);
 
+  // Add misc. vectorization patterns (e.g. for tensor.insert_slice)
+  linalg::populateInsertSliceVectorizationPatterns(patterns);
+
   if (getVectorizePadding())
     linalg::populatePadOpVectorizationPatterns(patterns);
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 0fe096863d7b016..da5233049aaf69e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -973,12 +973,7 @@ GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp,
       padOp.getLoc(), staticSizes, resultType.getElementType(), dynSizes);
   Value fill = createFillOrGenerateOp(rewriter, padOp, emptyTensor, dynSizes);
 
-  // Try optimize the copy of source.
-  if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded())
-    return success();
-
-  // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead
-  // for copying the PadOp source.
+  // Generate a InsertSliceOp for copying the PadOp source.
   auto sourceType = padOp.getSourceType();
   // Compute size of source of tensor::PadOp.
   SmallVector<OpFoldResult> srcSizes =
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index e1b97fbf985df81..090e0b46768d7e9 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -2281,115 +2281,6 @@ LogicalResult mlir::linalg::vectorizeCopy(RewriterBase &rewriter,
 //----------------------------------------------------------------------------//
 // Misc. vectorization patterns.
 //----------------------------------------------------------------------------//
-
-/// Helper function that retrieves the value of an IntegerAttr.
-static int64_t getIntFromAttr(Attribute attr) {
-  return cast<IntegerAttr>(attr).getInt();
-}
-
-/// Given an ArrayRef of OpFoldResults, return a vector of Values.
-/// IntegerAttrs are converted to ConstantIndexOps. Other attribute types are
-/// not supported.
-static SmallVector<Value> ofrToIndexValues(RewriterBase &rewriter, Location loc,
-                                           ArrayRef<OpFoldResult> ofrs) {
-  SmallVector<Value> result;
-  for (auto o : ofrs) {
-    if (auto val = llvm::dyn_cast_if_present<Value>(o)) {
-      result.push_back(val);
-    } else {
-      result.push_back(rewriter.create<arith::ConstantIndexOp>(
-          loc, getIntFromAttr(o.template get<Attribute>())));
-    }
-  }
-  return result;
-}
-
-/// Rewrite a tensor::PadOp into a sequence of EmptyOp, FillOp and
-/// InsertSliceOp. For now, only constant padding values are supported.
-/// If there is enough static type information, TransferReadOps and
-/// TransferWriteOps may be generated instead of InsertSliceOps.
-struct GenericPadOpVectorizationPattern : public GeneralizePadOpPattern {
-  GenericPadOpVectorizationPattern(MLIRContext *context,
-                                   PatternBenefit benefit = 1)
-      : GeneralizePadOpPattern(context, tryVectorizeCopy, benefit) {}
-  /// Vectorize the copying of a tensor::PadOp's source. This is possible if
-  /// each dimension size is statically know in the source type or the result
-  /// type (or both).
-  static LogicalResult tryVectorizeCopy(RewriterBase &rewriter,
-                                        tensor::PadOp padOp, Value dest) {
-    auto sourceType = padOp.getSourceType();
-    auto resultType = padOp.getResultType();
-    if (!VectorType::isValidElementType(sourceType.getElementType()))
-      return failure();
-
-    // Copy cannot be vectorized if pad value is non-constant and source shape
-    // is dynamic. In case of a dynamic source shape, padding must be appended
-    // by TransferReadOp, but TransferReadOp supports only constant padding.
-    auto padValue = padOp.getConstantPaddingValue();
-    if (!padValue) {
-      if (!sourceType.hasStaticShape())
-        return failure();
-      // Create dummy padding value.
-      auto elemType = sourceType.getElementType();
-      padValue = rewriter.create<arith::ConstantOp>(
-          padOp.getLoc(), elemType, rewriter.getZeroAttr(elemType));
-    }
-
-    SmallVector<int64_t> vecShape;
-    SmallVector<bool> readInBounds;
-    SmallVector<bool> writeInBounds;
-    for (unsigned i = 0; i < sourceType.getRank(); ++i) {
-      if (!sourceType.isDynamicDim(i)) {
-        vecShape.push_back(sourceType.getDimSize(i));
-        // Source shape is statically known: Neither read nor write are
-        // out-of- bounds.
-        readInBounds.push_back(true);
-        writeInBounds.push_back(true);
-      } else if (!resultType.isDynamicDim(i)) {
-        // Source shape is not statically known, but result shape is.
-        // Vectorize with size of result shape. This may be larger than the
-        // source size.
-        vecShape.push_back(resultType.getDimSize(i));
-        // Read may be out-of-bounds because the result size could be larger
-        // than the source size.
-        readInBounds.push_back(false);
-        // Write is out-of-bounds if low padding > 0.
-        writeInBounds.push_back(
-            getConstantIntValue(padOp.getMixedLowPad()[i]) ==
-            static_cast<int64_t>(0));
-      } else {
-        // Neither source nor result dim of padOp is static. Cannot vectorize
-        // the copy.
-        return failure();
-      }
-    }
-    auto vecType = VectorType::get(vecShape, sourceType.getElementType());
-
-    // Generate TransferReadOp.
-    SmallVector<Value> readIndices(
-        vecType.getRank(),
-        rewriter.create<arith::ConstantIndexOp>(padOp.getLoc(), 0));
-    auto read = rewriter.create<vector::TransferReadOp>(
-        padOp.getLoc(), vecType, padOp.getSource(), readIndices, padValue,
-        ArrayRef<bool>{readInBounds});
-
-    // If `dest` is a FillOp and the TransferWriteOp would overwrite the
-    // entire tensor, write directly to the FillOp's operand.
-    if (llvm::equal(vecShape, resultType.getShape()) &&
-        llvm::all_of(writeInBounds, [](bool b) { return b; }))
-      if (auto fill = dest.getDefiningOp<FillOp>())
-        dest = fill.output();
-
-    // Generate TransferWriteOp.
-    auto writeIndices =
-        ofrToIndexValues(rewriter, padOp.getLoc(), padOp.getMixedLowPad());
-    rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
-        padOp, read, dest, writeIndices, ArrayRef<bool>{writeInBounds});
-
-    return success();
-  }
-};
-
 /// Base pattern for rewriting tensor::PadOps whose result is consumed by a
 /// given operation type OpTy.
 template <typename OpTy>
@@ -2623,6 +2514,163 @@ struct PadOpVectorizationWithTransferWritePattern
   }
 };
 
+/// Returns the effective Pad value for the input op, provided it's a scalar.
+///
+/// Many Ops exhibit pad-like behaviour, but this isn't always explicit. If
+/// this Op performs padding, retrieve the padding value provided that it's
+/// a scalar and static/fixed for all the padded values. Returns an empty value
+/// otherwise.
+static Value getStaticPadVal(Operation *op) {
+  if (!op)
+    return {};
+
+  // 1. vector.broadcast (f32 -> vector <...xf32>) - return the value that's
+  // being broadcast, provided that it's a scalar.
+  if (auto bcast = llvm::dyn_cast<vector::BroadcastOp>(op)) {
+    auto source = bcast.getSource();
+    if (llvm::dyn_cast<VectorType>(source.getType()))
+      return {};
+
+    return source;
+  }
+
+  // 2. linalg.fill - use the scalar input value that used to fill the output
+  // tensor.
+  if (auto fill = llvm::dyn_cast<linalg::FillOp>(op)) {
+    return fill.getInputs()[0];
+  }
+
+  // 3. tensor.generateOp - can't guarantee the value is fixed without
+  // analysing, bail out.
+  if (auto generate = llvm::dyn_cast<tensor::GenerateOp>(op)) {
+    return {};
+  }
+
+  // 4. vector.transfer_write - inspect the input vector that's written from. If
+  // if contains a single value that has been broadcast (e.g. via
+  // vector.broadcast), extract it, fail otherwise.
+  if (auto xferWrite = llvm::dyn_cast<vector::TransferWriteOp>(op))
+    return getStaticPadVal(xferWrite.getVector().getDefiningOp());
+
+  // 5. tensor.insert_slice - inspect the destination tensor. If it's larger
+  // than the input tensor, then, provided it's constant, we'll extract the
+  // value that was used to generate it (via e.g. linalg.fill), fail otherwise.
+  // TODO: Clarify the semantics when the input tensor is larger than the
+  // destination.
+  if (auto slice = llvm::dyn_cast<tensor::InsertSliceOp>(op))
+    return getStaticPadVal(slice.getDest().getDefiningOp());
+
+  return {};
+}
+
+/// Rewrite tensor.insert.slice as a vector.transfer_read +
+/// vector.transfer_write pair. The vector size is inferred from the static
+/// dims in the input and output tensors. If a dim is dynamic in both the input
+/// and output tensors, bails out.
+///
+/// Before:
+///     !t_in_type = tensor<1x2x3xf32>
+///     !t_out_type = tensor<9x8x7x1x2x3xf32>
+///     !v_type = vector<1x2x3xf32>
+///     %inserted_slice = tensor.insert_slice %src into %dest ... : !t_in_type
+///     into !t_out_type
+/// After:
+///     %read = vector.transfer_read %src[...], %pad ... : !t_in_type, !v_type
+///     %write = vector.transfer_write %read, %dest ... : !v_type, !t_out_type
+///
+/// TODO: Support masking
+struct InsertSliceVectorizePattern
+    : public OpRewritePattern<tensor::InsertSliceOp> {
+  using OpRewritePattern<tensor::InsertSliceOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::InsertSliceOp sliceOp,
+                                PatternRewriter &rewriter) const final {
+    auto sourceType = sliceOp.getSource().getType();
+    if (!VectorType::isValidElementType(sourceType.getElementType()))
+      return failure();
+
+    auto resultType = sliceOp.getResultType();
+
+    // 1. Get the pad value.
+    // TransferReadOp requires a scalar padding value. Note that:
+    //    * for in-bounds access, the value is actually irrelevant.
+    //  There are 2 cases in which xfer.read accesses are known to be in-bounds:
+    //  1. The source shape is static (output vector sizes would be based on
+    //     the source shape and hence all memory accesses would be in-bounds),
+    //  2. Masking is used (output vector sizes would be user-provided, in which
+    //     case it is assumed that all memory accesses are in-bounds). This
+    //     remains a TODO.
+    //
+    // When the value is not known and not needed, use 0. Otherwise, bail out.
+    Value padValue = getStaticPadVal(sliceOp);
+    bool isOutOfBoundsRead = !sourceType.hasStaticShape();
+
+    if (!padValue && isOutOfBoundsRead) {
+      LDBG("Failed to get a pad value for out-of-bounds read access\n");
+      return failure();
+    }
+
+    if (!padValue) {
+      auto elemType = sourceType.getElementType();
+      padValue = rewriter.create<arith::ConstantOp>(
+          sliceOp.getLoc(), elemType, rewriter.getZeroAttr(elemType));
+    }
+
+    // 2. Get the vector shape and in-bounds attributes
+    SmallVector<int64_t> vecShape;
+    SmallVector<bool> readInBounds;
+    SmallVector<bool> writeInBounds;
+    size_t rankDiff = resultType.getRank() - sourceType.getRank();
+    for (unsigned i = 0; i < sourceType.getRank(); ++i) {
+      if (!sourceType.isDynamicDim(i)) {
+        vecShape.push_back(sourceType.getDimSize(i));
+        // Source shape is statically known: Neither read nor write are
+        // out-of-bounds.
+        readInBounds.push_back(true);
+        writeInBounds.push_back(true);
+      } else if (!resultType.isDynamicDim(i)) {
+        // Source shape is not statically known, but result shape is.
+        // Vectorize with size of result shape. This may be larger than the
+        // source size.
+        // FIXME: Using rankDiff implies that the source tensor is inserted at
+        // the end of the destination tensor. However, that's not required.
+        vecShape.push_back(resultType.getDimSize(rankDiff + i));
+        // Read may be out-of-bounds because the result size could be larger
+        // than the source size.
+        readInBounds.push_back(false);
+        // Write will in-bounds provided that the corresponding write idx is 0.
+        // To keep this logic simple, conservatively mark as out-of-bounds.
+        writeInBounds.push_back(false);
+      } else {
+        // Neither source nor result dim of padOp is static. Cannot vectorize
+        // the copy.
+        // TODO: Add support for masking
+        return failure();
+      }
+    }
+    auto vecType = VectorType::get(vecShape, sourceType.getElementType());
+
+    // 3. Generate TransferReadOp.
+    SmallVector<Value> readIndices(
+        vecType.getRank(),
+        rewriter.create<arith::ConstantIndexOp>(sliceOp.getLoc(), 0));
+    auto read = rewriter.create<vector::TransferReadOp>(
+        sliceOp.getLoc(), vecType, sliceOp.getSource(), readIndices, padValue,
+        ArrayRef<bool>{readInBounds});
+
+    // 4. Generate TransferWriteOp.
+    auto writeIndices = getValueOrCreateConstantIndexOp(
+        rewriter, sliceOp.getLoc(), sliceOp.getMixedOffsets());
+
+    // 5. Finalize
+    rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
+        sliceOp, read, sliceOp.getDest(), writeIndices,
+        ArrayRef<bool>{writeInBounds});
+
+    return success();
+  }
+};
+
 /// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.:
 /// ```
 /// %0 = tensor.pad %src ... : tensor<?x?xf32> to tensor<17x5xf32>
@@ -2699,8 +2747,8 @@ struct PadOpVectorizationWithInsertSlicePattern
     // Generate TransferWriteOp: Write to InsertSliceOp's dest tensor at
     // specified offsets. Write is fully in-bounds because a InsertSliceOp's
     // source must fit into the destination at the specified offsets.
-    auto writeIndices =
-        ofrToIndexValues(rewriter, padOp.getLoc(), insertOp.getMixedOffsets());
+    auto writeIndices = getValueOrCreateConstantIndexOp(
+        rewriter, padOp.getLoc(), insertOp.getMixedOffsets());
     SmallVector<bool> inBounds(vecRank, true);
     rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
         insertOp, read, insertOp.getDest(), writeIndices,
@@ -2710,10 +2758,18 @@ struct PadOpVectorizationWithInsertSlicePattern
   }
 };
 
+void mlir::linalg::populateInsertSliceVectorizationPatterns(
+    RewritePatternSet &patterns) {
+  patterns.add<InsertSliceVectorizePattern>(patterns.getContext());
+}
+
 void mlir::linalg::populatePadOpVectorizationPatterns(
     RewritePatternSet &patterns, PatternBenefit baseBenefit) {
-  patterns.add<GenericPadOpVectorizationPattern>(patterns.getContext(),
-                                                 baseBenefit);
+  // TODO: The following pattern implements "decomposition" and
+  // optional "vectorization". Seperate "decomposition" into a sepereate
+  // pre-processing pattern group.
+  patterns.add<GeneralizePadOpPattern>(patterns.getContext(), baseBenefit);
+
   // Try these specialized patterns first before resorting to the generic one.
   patterns.add<PadOpVectorizationWithTransferReadPattern,
                PadOpVectorizationWithTransferWritePattern,
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index d579a27359dfa01..2219505c9b802f6 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -1892,11 +1892,12 @@ LogicalResult ReinterpretCastOp::verify() {
   // Match sizes in result memref type and in static_sizes attribute.
   for (auto [idx, resultSize, expectedSize] :
        llvm::enumerate(resultType.getShape(), getStaticSizes())) {
-    if (!ShapedType::isDynamic(resultSize) &&
-        !ShapedType::isDynamic(expectedSize) && resultSize != expectedSize)
+    if (!ShapedType::isDynamic(resultSize) && resultSize != expectedSize)
       return emitError("expected result type with size = ")
-             << expectedSize << " instead of " << resultSize
-             << " in dim = " << idx;
+             << (ShapedType::isDynamic(expectedSize)
+                     ? std::string("dynamic")
+                     : std::to_string(expectedSize))
+             << " instead of " << resultSize << " in dim = " << idx;
   }
 
   // Match offset and strides in static_offset and static_strides attributes. If
@@ -1910,20 +1911,22 @@ LogicalResult ReinterpretCastOp::verify() {
 
   // Match offset in result memref type and in static_offsets attribute.
   int64_t expectedOffset = getStaticOffsets().front();
-  if (!ShapedType::isDynamic(resultOffset) &&
-      !ShapedType::isDynamic(expectedOffset) && resultOffset != expectedOffset)
+  if (!ShapedType::isDynamic(resultOffset) && resultOffset != expectedOffset)
     return emitError("expected result type with offset = ")
-           << expectedOffset << " instead of " << resultOffset;
+           << (ShapedType::isDynamic(expectedOffset)
+                   ? std::string("dynamic")
+                   : std::to_string(expectedOffset))
+           << " instead of " << resultOffset;
 
   // Match strides in result memref type and in static_strides attribute.
   for (auto [idx, resultStride, expectedStride] :
        llvm::enumerate(resultStrides, getStaticStrides())) {
-    if (!ShapedType::isDynamic(resultStride) &&
-        !ShapedType::isDynamic(expectedStride) &&
-        resultStride != expectedStride)
+    if (!ShapedType::isDynamic(resultStride) && resultStride != expectedStride)
       return emitError("expected result type with stride = ")
-             << expectedStride << " instead of " << resultStride
-             << " in dim = " << idx;
+             << (ShapedType::isDynamic(expectedStride)
+                     ? std::string("dynamic")
+                     : std::to_string(expectedStride))
+             << " instead of " << resultStride << " in dim = " << idx;
   }
 
   return success();
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp
index faba12f5bf82f89..83683c7e617bf80 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp
@@ -89,7 +89,8 @@ struct MemRefReshapeOpConverter : public OpRewritePattern<memref::ReshapeOp> {
     strides.resize(rank);
 
     Location loc = op.getLoc();
-    Value stride = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    Value stride = nullptr;
+    int64_t staticStride = 1;
     for (int i = rank - 1; i >= 0; --i) {
       Value size;
       // Load dynamic sizes from the shape input, use constants for static dims.
@@ -105,9 +106,22 @@ struct MemRefReshapeOpConverter : public OpRewritePattern<memref::ReshapeOp> {
         size = rewriter.create<arith::ConstantOp>(loc, sizeAttr);
         sizes[i] = sizeAttr;
       }
-      strides[i] = stride;
-      if (i > 0)
-        stride = rewriter.create<arith::MulIOp>(loc, stride, size);
+      if (stride)
+        strides[i] = stride;
+      else
+        strides[i] = rewriter.getIndexAttr(staticStride);
+
+      if (i > 0) {
+        if (stride) {
+          stride = rewriter.create<arith::MulIOp>(loc, stride, size);
+        } else if (op.getType().isDynamicDim(i)) {
+          stride = rewriter.create<arith::MulIOp>(
+              loc, rewriter.create<arith::ConstantIndexOp>(loc, staticStride),
+              size);
+        } else {
+          staticStride *= op.getType().getDimSize(i);
+        }
+      }
     }
     rewriter.replaceOpWithNewOp<memref::ReinterpretCastOp>(
         op, op.getType(), op.getSource(), /*offset=*/rewriter.getIndexAttr(0),
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
index a2049ba4a4924d2..087d1fcc2b23ae4 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
@@ -507,6 +507,8 @@ getCollapsedStride(memref::CollapseShapeOp collapseShape, OpBuilder &builder,
 
   SmallVector<OpFoldResult> groupStrides;
   ArrayRef<int64_t> srcShape = sourceType.getShape();
+
+  OpFoldResult lastValidStride = nullptr;
   for (int64_t currentDim : reassocGroup) {
     // Skip size-of-1 dimensions, since right now their strides may be
     // meaningless.
@@ -517,11 +519,11 @@ getCollapsedStride(memref::CollapseShapeOp collapseShape, OpBuilder &builder,
       continue;
 
     int64_t currentStride = strides[currentDim];
-    groupStrides.push_back(ShapedType::isDynamic(currentStride)
-                               ? origStrides[currentDim]
-                               : builder.getIndexAttr(currentStride));
+    lastValidStride = ShapedType::isDynamic(currentStride)
+                          ? origStrides[currentDim]
+                          : builder.getIndexAttr(currentStride);
   }
-  if (groupStrides.empty()) {
+  if (!lastValidStride) {
     // We're dealing with a 1x1x...x1 shape. The stride is meaningless,
     // but we still have to make the type system happy.
     MemRefType collapsedType = collapseShape.getResultType();
@@ -543,12 +545,7 @@ getCollapsedStride(memref::CollapseShapeOp collapseShape, OpBuilder &builder,
     return {builder.getIndexAttr(finalStride)};
   }
 
-  // For the general case, we just want the minimum stride
-  // since the collapsed dimensions are contiguous.
-  auto minMap = AffineMap::getMultiDimIdentityMap(groupStrides.size(),
-                                                  builder.getContext());
-  return {makeComposedFoldedAffineMin(builder, collapseShape.getLoc(), minMap,
-                                      groupStrides)};
+  return {lastValidStride};
 }
 
 /// From `reshape_like(memref, subSizes, subStrides))` compute
diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
index 7321b19068016cc..6de744a7f752448 100644
--- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
+++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp
@@ -81,11 +81,10 @@ std::pair<LinearizedMemRefInfo, OpFoldResult> getLinearizedMemRefOffsetAndSize(
 
   // Adjust linearizedIndices and size by the scale factor (dstBits / srcBits).
   int64_t scaler = dstBits / srcBits;
-  addMulMap = addMulMap.floorDiv(scaler);
   mulMap = mulMap.floorDiv(scaler);
 
   OpFoldResult linearizedIndices = affine::makeComposedFoldedAffineApply(
-      builder, loc, addMulMap, offsetValues);
+      builder, loc, addMulMap.floorDiv(scaler), offsetValues);
   OpFoldResult linearizedSize =
       affine::makeComposedFoldedAffineApply(builder, loc, mulMap, sizes);
 
@@ -95,7 +94,11 @@ std::pair<LinearizedMemRefInfo, OpFoldResult> getLinearizedMemRefOffsetAndSize(
   OpFoldResult adjustBaseOffset = affine::makeComposedFoldedAffineApply(
       builder, loc, s0.floorDiv(scaler), {offset});
 
-  return {{adjustBaseOffset, linearizedSize}, linearizedIndices};
+  OpFoldResult intraVectorOffset = affine::makeComposedFoldedAffineApply(
+      builder, loc, addMulMap % scaler, offsetValues);
+
+  return {{adjustBaseOffset, linearizedSize, intraVectorOffset},
+          linearizedIndices};
 }
 
 LinearizedMemRefInfo
diff --git a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
index cf40443ff383906..779c41a22e9ee2b 100644
--- a/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -649,7 +649,8 @@ struct ForOpInterface
     if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state)))
       return failure();
 
-    if (!state.getOptions().enforceAliasingInvariants)
+    if (!state.getOptions().enforceAliasingInvariants ||
+        state.getOptions().copyBeforeWrite)
       return success();
 
     // According to the `getAliasing...` implementations, a bufferized OpResult
@@ -889,7 +890,8 @@ struct WhileOpInterface
     if (failed(bufferizableOp.resolveTensorOpOperandConflicts(rewriter, state)))
       return failure();
 
-    if (!state.getOptions().enforceAliasingInvariants)
+    if (!state.getOptions().enforceAliasingInvariants ||
+        state.getOptions().copyBeforeWrite)
       return success();
 
     // According to the `getAliasing...` implementations, a bufferized OpResult
diff --git a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp b/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
deleted file mode 100644
index 21c618ab633f604..000000000000000
--- a/mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- Bufferize.cpp - scf bufferize pass ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/SCF/Transforms/Passes.h"
-
-#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/IR/SCF.h"
-#include "mlir/Dialect/SCF/Transforms/Patterns.h"
-#include "mlir/Transforms/DialectConversion.h"
-
-namespace mlir {
-#define GEN_PASS_DEF_SCFBUFFERIZE
-#include "mlir/Dialect/SCF/Transforms/Passes.h.inc"
-} // namespace mlir
-
-using namespace mlir;
-using namespace mlir::scf;
-
-namespace {
-struct SCFBufferizePass : public impl::SCFBufferizeBase<SCFBufferizePass> {
-  void runOnOperation() override {
-    auto *func = getOperation();
-    auto *context = &getContext();
-
-    bufferization::BufferizeTypeConverter typeConverter;
-    RewritePatternSet patterns(context);
-    ConversionTarget target(*context);
-
-    bufferization::populateBufferizeMaterializationLegality(target);
-    populateSCFStructuralTypeConversionsAndLegality(typeConverter, patterns,
-                                                    target);
-    if (failed(applyPartialConversion(func, target, std::move(patterns))))
-      return signalPassFailure();
-  };
-};
-} // namespace
-
-std::unique_ptr<Pass> mlir::createSCFBufferizePass() {
-  return std::make_unique<SCFBufferizePass>();
-}
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index 5dc7c60792b9b65..e99b5d0cc26fc7d 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_mlir_dialect_library(MLIRSCFTransforms
   BufferDeallocationOpInterfaceImpl.cpp
   BufferizableOpInterfaceImpl.cpp
-  Bufferize.cpp
   ForallToFor.cpp
   ForallToParallel.cpp
   ForToWhile.cpp
diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
index a30e349d49136c2..5104ad4b3a3038f 100644
--- a/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp
@@ -206,12 +206,11 @@ LogicalResult mlir::scf::peelForLoopAndSimplifyBounds(RewriterBase &rewriter,
   return success();
 }
 
-/// When the `peelFront` option is set as true, the first iteration of the loop
-/// is peeled off. This function rewrites the original scf::ForOp as two
-/// scf::ForOp Ops, the first scf::ForOp corresponds to the first iteration of
-/// the loop which can be canonicalized away in the following optimization. The
-/// second loop Op contains the remaining iteration, and the new lower bound is
-/// the original lower bound plus the number of steps.
+/// Rewrites the original scf::ForOp as two scf::ForOp Ops, the first
+/// scf::ForOp corresponds to the first iteration of the loop which can be
+/// canonicalized away in the following optimizations. The second loop Op
+/// contains the remaining iterations, with a lower bound updated as the
+/// original lower bound plus the step (i.e. skips the first iteration).
 LogicalResult mlir::scf::peelForLoopFirstIteration(RewriterBase &b, ForOp forOp,
                                                    ForOp &firstIteration) {
   RewriterBase::InsertionGuard guard(b);
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index a2abe1619454f26..1853ae04f45d90c 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -2951,11 +2951,11 @@ void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results,
               InsertOpConstantFolder>(context);
 }
 
-// Eliminates insert operations that produce values identical to their source
-// value. This happens when the source and destination vectors have identical
-// sizes.
 OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) {
-  if (getNumIndices() == 0)
+  // Fold "vector.insert %v, %dest [] : vector<2x2xf32> from vector<2x2xf32>" to
+  // %v. Note: Do not fold "vector.insert %v, %dest [] : f32 into vector<f32>"
+  // (type mismatch).
+  if (getNumIndices() == 0 && getSourceType() == getType())
     return getSource();
   return {};
 }
@@ -4977,8 +4977,8 @@ LogicalResult MaskedLoadOp::verify() {
     return emitOpError("base and result element type should match");
   if (llvm::size(getIndices()) != memType.getRank())
     return emitOpError("requires ") << memType.getRank() << " indices";
-  if (resVType.getDimSize(0) != maskVType.getDimSize(0))
-    return emitOpError("expected result dim to match mask dim");
+  if (resVType.getShape() != maskVType.getShape())
+    return emitOpError("expected result shape to match mask shape");
   if (resVType != passVType)
     return emitOpError("expected pass_thru of same type as result type");
   return success();
@@ -5030,8 +5030,8 @@ LogicalResult MaskedStoreOp::verify() {
     return emitOpError("base and valueToStore element type should match");
   if (llvm::size(getIndices()) != memType.getRank())
     return emitOpError("requires ") << memType.getRank() << " indices";
-  if (valueVType.getDimSize(0) != maskVType.getDimSize(0))
-    return emitOpError("expected valueToStore dim to match mask dim");
+  if (valueVType.getShape() != maskVType.getShape())
+    return emitOpError("expected valueToStore shape to match mask shape");
   return success();
 }
 
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
index 66362d3ca70fb64..1d6f8a991d9b5b7 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/Arith/Utils/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
+#include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -22,8 +23,10 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdint>
+#include <optional>
 
 using namespace mlir;
 
@@ -33,17 +36,22 @@ using namespace mlir;
 #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
 
 /// Returns a compressed mask. The mask value is set only if any mask is present
-/// in the scale range. E.g., if `scale` equals to 2, the following mask:
+/// in the scale range. E.g., if `scale` equals to 2, and `intraDataOffset`
+/// equals to 2, the following mask:
 ///
 ///   %mask = [1, 1, 1, 0, 0, 0]
 ///
-/// will return the following new compressed mask:
+/// will first be padded with number of `intraDataOffset` zeros:
+///   %mask = [0, 0, 1, 1, 1, 0, 0, 0]
 ///
-///   %mask = [1, 1, 0]
+/// then it will return the following new compressed mask:
+///
+///   %mask = [0, 1, 1, 0]
 static FailureOr<Operation *> getCompressedMaskOp(OpBuilder &rewriter,
                                                   Location loc, Value mask,
-                                                  int origElements, int scale) {
-  auto numElements = (origElements + scale - 1) / scale;
+                                                  int origElements, int scale,
+                                                  int intraDataOffset = 0) {
+  auto numElements = (intraDataOffset + origElements + scale - 1) / scale;
 
   Operation *maskOp = mask.getDefiningOp();
   SmallVector<vector::ExtractOp, 2> extractOps;
@@ -67,6 +75,9 @@ static FailureOr<Operation *> getCompressedMaskOp(OpBuilder &rewriter,
   shape.back() = numElements;
   auto newMaskType = VectorType::get(shape, rewriter.getI1Type());
   if (createMaskOp) {
+    // TODO: handle the case with non-zero intraDataOffset for CreateMaskOp.
+    if (intraDataOffset != 0)
+      return failure();
     OperandRange maskOperands = createMaskOp.getOperands();
     size_t numMaskOperands = maskOperands.size();
     AffineExpr s0;
@@ -86,11 +97,27 @@ static FailureOr<Operation *> getCompressedMaskOp(OpBuilder &rewriter,
     ArrayRef<int64_t> maskDimSizes = constantMaskOp.getMaskDimSizes();
     size_t numMaskOperands = maskDimSizes.size();
     int64_t origIndex = maskDimSizes[numMaskOperands - 1];
-    int64_t maskIndex = (origIndex + scale - 1) / scale;
+    int64_t startIndex = intraDataOffset / scale;
+    int64_t maskIndex = llvm::divideCeil(intraDataOffset + origIndex, scale);
+
+    // TODO: we only want the mask between [startIndex, maskIndex] to be true,
+    // the rest are false.
+    if (intraDataOffset != 0 && maskDimSizes.size() > 1)
+      return failure();
+
     SmallVector<int64_t> newMaskDimSizes(maskDimSizes.drop_back());
     newMaskDimSizes.push_back(maskIndex);
-    newMask = rewriter.create<vector::ConstantMaskOp>(loc, newMaskType,
-                                                      newMaskDimSizes);
+
+    if (intraDataOffset == 0) {
+      newMask = rewriter.create<vector::ConstantMaskOp>(loc, newMaskType,
+                                                        newMaskDimSizes);
+    } else {
+      SmallVector<bool> newMaskValues;
+      for (int64_t i = 0; i < numElements; ++i)
+        newMaskValues.push_back(i >= startIndex && i < maskIndex);
+      auto denseAttr = DenseElementsAttr::get(newMaskType, newMaskValues);
+      newMask = rewriter.create<arith::ConstantOp>(loc, newMaskType, denseAttr);
+    }
   }
 
   while (!extractOps.empty()) {
@@ -102,6 +129,26 @@ static FailureOr<Operation *> getCompressedMaskOp(OpBuilder &rewriter,
   return newMask;
 }
 
+static Value extractSubvectorFrom(RewriterBase &rewriter, Location loc,
+                                  VectorType extractType, Value vector,
+                                  int64_t frontOffset, int64_t subvecSize) {
+  auto offsets = rewriter.getI64ArrayAttr({frontOffset});
+  auto sizes = rewriter.getI64ArrayAttr({subvecSize});
+  auto strides = rewriter.getI64ArrayAttr({1});
+  return rewriter
+      .create<vector::ExtractStridedSliceOp>(loc, extractType, vector, offsets,
+                                             sizes, strides)
+      ->getResult(0);
+}
+
+static Value insertSubvectorInto(RewriterBase &rewriter, Location loc,
+                                 Value src, Value dest, int64_t offset) {
+  auto offsets = rewriter.getI64ArrayAttr({offset});
+  auto strides = rewriter.getI64ArrayAttr({1});
+  return rewriter.create<vector::InsertStridedSliceOp>(loc, dest.getType(), src,
+                                                       dest, offsets, strides);
+}
+
 namespace {
 
 //===----------------------------------------------------------------------===//
@@ -201,7 +248,8 @@ struct ConvertVectorMaskedStore final
     auto stridedMetadata =
         rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getBase());
     OpFoldResult linearizedIndicesOfr;
-    std::tie(std::ignore, linearizedIndicesOfr) =
+    memref::LinearizedMemRefInfo linearizedInfo;
+    std::tie(linearizedInfo, linearizedIndicesOfr) =
         memref::getLinearizedMemRefOffsetAndSize(
             rewriter, loc, srcBits, dstBits,
             stridedMetadata.getConstifiedMixedOffset(),
@@ -214,19 +262,19 @@ struct ConvertVectorMaskedStore final
     // Load the whole data and use arith.select to handle the corner cases.
     // E.g., given these input values:
     //
-    //   %mask = [1, 1, 1, 0, 0, 0]
-    //   %0[%c0, %c0] contains [0x1, 0x2, 0x3, 0x4, 0x5, 0x6]
-    //   %value_to_store = [0x7, 0x8, 0x9, 0xA, 0xB, 0xC]
+    //   %mask = [0, 1, 1, 1, 1, 1, 0, 0]
+    //   %0[%c0, %c0] contains [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8]
+    //   %value_to_store = [0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0]
     //
     // we'll have
     //
-    //    expected output: [0x7, 0x8, 0x9, 0x4, 0x5, 0x6]
+    //    expected output: [0x1, 0xA, 0xB, 0xC, 0xD, 0xE, 0x7, 0x8]
     //
-    //    %new_mask = [1, 1, 0]
-    //    %maskedload = [0x12, 0x34, 0x0]
-    //    %bitcast = [0x1, 0x2, 0x3, 0x4, 0x0, 0x0]
-    //    %select_using_original_mask = [0x7, 0x8, 0x9, 0x4, 0x0, 0x0]
-    //    %packed_data = [0x78, 0x94, 0x00]
+    //    %new_mask = [1, 1, 1, 0]
+    //    %maskedload = [0x12, 0x34, 0x56, 0x00]
+    //    %bitcast = [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x0, 0x0]
+    //    %select_using_shifted_mask = [0x1, 0xA, 0xB, 0xC, 0xD, 0xE, 0x0, 0x0]
+    //    %packed_data = [0x1A, 0xBC, 0xDE, 0x00]
     //
     // Using the new mask to store %packed_data results in expected output.
     FailureOr<Operation *> newMask =
@@ -243,8 +291,9 @@ struct ConvertVectorMaskedStore final
         loc, newType, adaptor.getBase(), linearizedIndices,
         newMask.value()->getResult(0), passThru);
 
-    Value valueToStore = rewriter.create<vector::BitCastOp>(
-        loc, op.getValueToStore().getType(), newLoad);
+    auto newBitCastType = VectorType::get(numElements * scale, oldElementType);
+    Value valueToStore =
+        rewriter.create<vector::BitCastOp>(loc, newBitCastType, newLoad);
     valueToStore = rewriter.create<arith::SelectOp>(
         loc, op.getMask(), op.getValueToStore(), valueToStore);
     valueToStore =
@@ -294,19 +343,31 @@ struct ConvertVectorLoad final : OpConversionPattern<vector::LoadOp> {
     // %1 = vector.load %0[%linear_index] : memref<6xi8>, vector<2xi8>
     // %2 = vector.bitcast %1 : vector<2xi8> to vector<4xi4>
     //
-    // TODO: Currently, only the even number of elements loading is supported.
-    // To deal with the odd number of elements, one has to extract the
-    // subvector at the proper offset after bit-casting.
+    // There are cases where the number of elements to load is not byte-aligned,
+    // for example:
+    //
+    // %1 = vector.load %0[%c1, %c0] : memref<3x3xi2>, vector<3xi2>
+    //
+    // we will have to load extra bytes and extract the exact slice in between.
+    //
+    // %1 = vector.load %0[%c2] : memref<3xi8>, vector<2xi8>
+    // %2 = vector.bitcast %1 : vector<2xi8> to vector<8xi2>
+    // %3 = vector.extract_strided_slice %1 {offsets = [2], sizes = [3], strides
+    // = [1]}
+    //        : vector<8xi2> to vector<3xi2>
+    //
+    // TODO: Currently the extract_strided_slice's attributes must be known at
+    // compile time as they must be constants.
 
     auto origElements = op.getVectorType().getNumElements();
-    if (origElements % scale != 0)
-      return failure();
+    bool isUnalignedEmulation = origElements % scale != 0;
 
     auto stridedMetadata =
         rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getBase());
 
     OpFoldResult linearizedIndices;
-    std::tie(std::ignore, linearizedIndices) =
+    memref::LinearizedMemRefInfo linearizedInfo;
+    std::tie(linearizedInfo, linearizedIndices) =
         memref::getLinearizedMemRefOffsetAndSize(
             rewriter, loc, srcBits, dstBits,
             stridedMetadata.getConstifiedMixedOffset(),
@@ -314,15 +375,31 @@ struct ConvertVectorLoad final : OpConversionPattern<vector::LoadOp> {
             stridedMetadata.getConstifiedMixedStrides(),
             getAsOpFoldResult(adaptor.getIndices()));
 
-    auto numElements = (origElements + scale - 1) / scale;
+    std::optional<int64_t> foldedIntraVectorOffset =
+        isUnalignedEmulation
+            ? getConstantIntValue(linearizedInfo.intraDataOffset)
+            : 0;
+
+    if (!foldedIntraVectorOffset) {
+      // unimplemented case for dynamic intra vector offset
+      return failure();
+    }
+
+    auto numElements =
+        llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale);
     auto newLoad = rewriter.create<vector::LoadOp>(
         loc, VectorType::get(numElements, newElementType), adaptor.getBase(),
         getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices));
 
-    auto bitCast =
-        rewriter.create<vector::BitCastOp>(loc, op.getType(), newLoad);
+    Value result = rewriter.create<vector::BitCastOp>(
+        loc, VectorType::get(numElements * scale, oldElementType), newLoad);
 
-    rewriter.replaceOp(op, bitCast->getResult(0));
+    if (isUnalignedEmulation) {
+      result = extractSubvectorFrom(rewriter, loc, op.getType(), result,
+                                    *foldedIntraVectorOffset, origElements);
+    }
+
+    rewriter.replaceOp(op, result);
     return success();
   }
 };
@@ -396,13 +473,13 @@ struct ConvertVectorMaskedLoad final
     // subvector at the proper offset after bit-casting.
     auto origType = op.getVectorType();
     auto origElements = origType.getNumElements();
-    if (origElements % scale != 0)
-      return failure();
+    bool isUnalignedEmulation = origElements % scale != 0;
 
     auto stridedMetadata =
         rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getBase());
     OpFoldResult linearizedIndices;
-    std::tie(std::ignore, linearizedIndices) =
+    memref::LinearizedMemRefInfo linearizedInfo;
+    std::tie(linearizedInfo, linearizedIndices) =
         memref::getLinearizedMemRefOffsetAndSize(
             rewriter, loc, srcBits, dstBits,
             stridedMetadata.getConstifiedMixedOffset(),
@@ -410,29 +487,68 @@ struct ConvertVectorMaskedLoad final
             stridedMetadata.getConstifiedMixedStrides(),
             getAsOpFoldResult(adaptor.getIndices()));
 
+    std::optional<int64_t> foldedIntraVectorOffset =
+        isUnalignedEmulation
+            ? getConstantIntValue(linearizedInfo.intraDataOffset)
+            : 0;
+
+    if (!foldedIntraVectorOffset) {
+      // unimplemented case for dynamic intra vector offset
+      return failure();
+    }
+
     FailureOr<Operation *> newMask =
-        getCompressedMaskOp(rewriter, loc, op.getMask(), origElements, scale);
+        getCompressedMaskOp(rewriter, loc, op.getMask(), origElements, scale,
+                            *foldedIntraVectorOffset);
     if (failed(newMask))
       return failure();
 
-    auto numElements = (origElements + scale - 1) / scale;
-    auto newType = VectorType::get(numElements, newElementType);
+    auto numElements =
+        llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale);
+    auto loadType = VectorType::get(numElements, newElementType);
+    auto newBitcastType = VectorType::get(numElements * scale, oldElementType);
+
+    Value passthru = op.getPassThru();
+    if (isUnalignedEmulation) {
+      // create an empty vector of the new type
+      auto emptyVector = rewriter.create<arith::ConstantOp>(
+          loc, newBitcastType, rewriter.getZeroAttr(newBitcastType));
+      passthru = insertSubvectorInto(rewriter, loc, passthru, emptyVector,
+                                     *foldedIntraVectorOffset);
+    }
     auto newPassThru =
-        rewriter.create<vector::BitCastOp>(loc, newType, op.getPassThru());
+        rewriter.create<vector::BitCastOp>(loc, loadType, passthru);
 
     // Generating the new masked load.
     auto newLoad = rewriter.create<vector::MaskedLoadOp>(
-        loc, newType, adaptor.getBase(),
+        loc, loadType, adaptor.getBase(),
         getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices),
         newMask.value()->getResult(0), newPassThru);
 
     // Setting the part that originally was not effectively loaded from memory
     // to pass through.
     auto bitCast =
-        rewriter.create<vector::BitCastOp>(loc, op.getType(), newLoad);
-    auto select = rewriter.create<arith::SelectOp>(loc, op.getMask(), bitCast,
-                                                   op.getPassThru());
-    rewriter.replaceOp(op, select->getResult(0));
+        rewriter.create<vector::BitCastOp>(loc, newBitcastType, newLoad);
+
+    Value mask = op.getMask();
+    if (isUnalignedEmulation) {
+      auto newSelectMaskType =
+          VectorType::get(numElements * scale, rewriter.getI1Type());
+      // TODO: can fold if op's mask is constant
+      auto emptyVector = rewriter.create<arith::ConstantOp>(
+          loc, newSelectMaskType, rewriter.getZeroAttr(newSelectMaskType));
+      mask = insertSubvectorInto(rewriter, loc, op.getMask(), emptyVector,
+                                 *foldedIntraVectorOffset);
+    }
+
+    Value result =
+        rewriter.create<arith::SelectOp>(loc, mask, bitCast, passthru);
+
+    if (isUnalignedEmulation) {
+      result = extractSubvectorFrom(rewriter, loc, op.getType(), result,
+                                    *foldedIntraVectorOffset, origElements);
+    }
+    rewriter.replaceOp(op, result);
 
     return success();
   }
@@ -464,8 +580,8 @@ struct ConvertVectorTransferRead final
     int scale = dstBits / srcBits;
 
     auto origElements = op.getVectorType().getNumElements();
-    if (origElements % scale != 0)
-      return failure();
+
+    bool isUnalignedEmulation = origElements % scale != 0;
 
     auto newPadding = rewriter.create<arith::ExtUIOp>(loc, newElementType,
                                                       adaptor.getPadding());
@@ -474,7 +590,8 @@ struct ConvertVectorTransferRead final
         rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getSource());
 
     OpFoldResult linearizedIndices;
-    std::tie(std::ignore, linearizedIndices) =
+    memref::LinearizedMemRefInfo linearizedInfo;
+    std::tie(linearizedInfo, linearizedIndices) =
         memref::getLinearizedMemRefOffsetAndSize(
             rewriter, loc, srcBits, dstBits,
             stridedMetadata.getConstifiedMixedOffset(),
@@ -482,18 +599,34 @@ struct ConvertVectorTransferRead final
             stridedMetadata.getConstifiedMixedStrides(),
             getAsOpFoldResult(adaptor.getIndices()));
 
-    auto numElements = (origElements + scale - 1) / scale;
-    auto newReadType = VectorType::get(numElements, newElementType);
+    std::optional<int64_t> foldedIntraVectorOffset =
+        isUnalignedEmulation
+            ? getConstantIntValue(linearizedInfo.intraDataOffset)
+            : 0;
+
+    if (!foldedIntraVectorOffset) {
+      // unimplemented case for dynamic inra-vector offset
+      return failure();
+    }
+
+    auto numElements =
+        llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale);
 
     auto newRead = rewriter.create<vector::TransferReadOp>(
-        loc, newReadType, adaptor.getSource(),
+        loc, VectorType::get(numElements, newElementType), adaptor.getSource(),
         getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices),
         newPadding);
 
-    auto bitCast =
-        rewriter.create<vector::BitCastOp>(loc, op.getType(), newRead);
+    auto bitCast = rewriter.create<vector::BitCastOp>(
+        loc, VectorType::get(numElements * scale, oldElementType), newRead);
+
+    Value result = bitCast->getResult(0);
+    if (isUnalignedEmulation) {
+      result = extractSubvectorFrom(rewriter, loc, op.getType(), result,
+                                    *foldedIntraVectorOffset, origElements);
+    }
+    rewriter.replaceOp(op, result);
 
-    rewriter.replaceOp(op, bitCast->getResult(0));
     return success();
   }
 };
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
index e05c801121ffc44..3a30382114c8dc7 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
@@ -354,11 +354,13 @@ namespace {
 /// inserting a memref.subview dropping those unit dims. The vector shapes are
 /// also reduced accordingly.
 class TransferReadDropUnitDimsPattern
-    : public OpRewritePattern<vector::TransferReadOp> {
-  using OpRewritePattern::OpRewritePattern;
+    : public vector::MaskableOpRewritePattern<vector::TransferReadOp> {
+  using MaskableOpRewritePattern::MaskableOpRewritePattern;
 
-  LogicalResult matchAndRewrite(vector::TransferReadOp transferReadOp,
-                                PatternRewriter &rewriter) const override {
+  FailureOr<Value>
+  matchAndRewriteMaskableOp(vector::TransferReadOp transferReadOp,
+                            vector::MaskingOpInterface maskingOp,
+                            PatternRewriter &rewriter) const override {
     auto loc = transferReadOp.getLoc();
     Value vector = transferReadOp.getVector();
     VectorType vectorType = cast<VectorType>(vector.getType());
@@ -376,6 +378,10 @@ class TransferReadDropUnitDimsPattern
     int reducedRank = getReducedRank(sourceType.getShape());
     if (reducedRank == sourceType.getRank())
       return failure();
+    // TODO: Extend vector.mask to support 0-d vectors. In the meantime, bail
+    // out.
+    if (reducedRank == 0 && maskingOp)
+      return failure();
     // Check if the reduced vector shape matches the reduced source shape.
     // Otherwise, this case is not supported yet.
     VectorType reducedVectorType = trimNonScalableUnitDims(vectorType);
@@ -406,15 +412,23 @@ class TransferReadDropUnitDimsPattern
     SmallVector<Value> zeros(reducedRank, c0);
     auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank);
     SmallVector<bool> inBounds(reducedVectorType.getRank(), true);
-    auto newTransferReadOp = rewriter.create<vector::TransferReadOp>(
+    Operation *newTransferReadOp = rewriter.create<vector::TransferReadOp>(
         loc, reducedVectorType, reducedShapeSource, zeros, identityMap,
         transferReadOp.getPadding(), maskOp,
         rewriter.getBoolArrayAttr(inBounds));
+
+    if (maskingOp) {
+      auto shapeCastMask = rewriter.createOrFold<vector::ShapeCastOp>(
+          loc, reducedVectorType.cloneWith(std::nullopt, rewriter.getI1Type()),
+          maskingOp.getMask());
+      newTransferReadOp = mlir::vector::maskOperation(
+          rewriter, newTransferReadOp, shapeCastMask);
+    }
+
     auto shapeCast = rewriter.createOrFold<vector::ShapeCastOp>(
-        loc, vectorType, newTransferReadOp);
-    rewriter.replaceOp(transferReadOp, shapeCast);
+        loc, vectorType, newTransferReadOp->getResults()[0]);
 
-    return success();
+    return shapeCast;
   }
 };
 
@@ -422,11 +436,13 @@ class TransferReadDropUnitDimsPattern
 /// has unit dims, by inserting a `memref.subview` dropping those unit dims. The
 /// vector shapes are also reduced accordingly.
 class TransferWriteDropUnitDimsPattern
-    : public OpRewritePattern<vector::TransferWriteOp> {
-  using OpRewritePattern::OpRewritePattern;
+    : public vector::MaskableOpRewritePattern<vector::TransferWriteOp> {
+  using MaskableOpRewritePattern::MaskableOpRewritePattern;
 
-  LogicalResult matchAndRewrite(vector::TransferWriteOp transferWriteOp,
-                                PatternRewriter &rewriter) const override {
+  FailureOr<Value>
+  matchAndRewriteMaskableOp(vector::TransferWriteOp transferWriteOp,
+                            vector::MaskingOpInterface maskingOp,
+                            PatternRewriter &rewriter) const override {
     auto loc = transferWriteOp.getLoc();
     Value vector = transferWriteOp.getVector();
     VectorType vectorType = cast<VectorType>(vector.getType());
@@ -444,6 +460,10 @@ class TransferWriteDropUnitDimsPattern
     int reducedRank = getReducedRank(sourceType.getShape());
     if (reducedRank == sourceType.getRank())
       return failure();
+    // TODO: Extend vector.mask to support 0-d vectors. In the meantime, bail
+    // out.
+    if (reducedRank == 0 && maskingOp)
+      return failure();
     // Check if the reduced vector shape matches the reduced destination shape.
     // Otherwise, this case is not supported yet.
     VectorType reducedVectorType = trimNonScalableUnitDims(vectorType);
@@ -474,13 +494,26 @@ class TransferWriteDropUnitDimsPattern
     SmallVector<Value> zeros(reducedRank, c0);
     auto identityMap = rewriter.getMultiDimIdentityMap(reducedRank);
     SmallVector<bool> inBounds(reducedVectorType.getRank(), true);
-    auto shapeCast = rewriter.createOrFold<vector::ShapeCastOp>(
+    auto shapeCastSrc = rewriter.createOrFold<vector::ShapeCastOp>(
         loc, reducedVectorType, vector);
-    rewriter.replaceOpWithNewOp<vector::TransferWriteOp>(
-        transferWriteOp, Type(), shapeCast, reducedShapeSource, zeros,
-        identityMap, maskOp, rewriter.getBoolArrayAttr(inBounds));
+    Operation *newXferWrite = rewriter.create<vector::TransferWriteOp>(
+        loc, Type(), shapeCastSrc, reducedShapeSource, zeros, identityMap,
+        maskOp, rewriter.getBoolArrayAttr(inBounds));
+
+    if (maskingOp) {
+      auto shapeCastMask = rewriter.createOrFold<vector::ShapeCastOp>(
+          loc, reducedVectorType.cloneWith(std::nullopt, rewriter.getI1Type()),
+          maskingOp.getMask());
+      newXferWrite =
+          mlir::vector::maskOperation(rewriter, newXferWrite, shapeCastMask);
+    }
 
-    return success();
+    if (transferWriteOp.hasPureTensorSemantics())
+      return newXferWrite->getResults()[0];
+
+    // With Memref semantics, there's no return value. Use empty value to signal
+    // success.
+    return Value();
   }
 };
 
diff --git a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp
index ec9ed87723e1cc5..a2acf3e732adab0 100644
--- a/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp
+++ b/mlir/lib/Interfaces/Utils/InferIntRangeCommon.cpp
@@ -298,8 +298,14 @@ static ConstantIntRanges inferDivURange(const ConstantIntRanges &lhs,
     return minMaxBy(udiv, {lhsMin, lhsMax}, {rhsMin, rhsMax},
                     /*isSigned=*/false);
   }
-  // Otherwise, it's possible we might divide by 0.
-  return ConstantIntRanges::maxRange(rhsMin.getBitWidth());
+
+  APInt umin = APInt::getZero(rhsMin.getBitWidth());
+  if (lhsMin.uge(rhsMax) && !rhsMax.isZero())
+    umin = lhsMin.udiv(rhsMax);
+
+  // X u/ Y u<= X.
+  APInt umax = lhsMax;
+  return ConstantIntRanges::fromUnsigned(umin, umax);
 }
 
 ConstantIntRanges
diff --git a/mlir/lib/Query/QueryParser.cpp b/mlir/lib/Query/QueryParser.cpp
index 13ee931cc5227fe..31aead7d403d0df 100644
--- a/mlir/lib/Query/QueryParser.cpp
+++ b/mlir/lib/Query/QueryParser.cpp
@@ -181,8 +181,8 @@ QueryRef QueryParser::doParse() {
     if (!matcher) {
       return makeInvalidQueryFromDiagnostics(diag);
     }
-    auto actualSource = origMatcherSource.slice(0, origMatcherSource.size() -
-                                                       matcherSource.size());
+    auto actualSource = origMatcherSource.substr(0, origMatcherSource.size() -
+                                                        matcherSource.size());
     QueryRef query = new MatchQuery(actualSource, *matcher);
     query->remainingContent = matcherSource;
     return query;
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 27cd38dc3c62d95..d20e5e40076bc3f 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -137,9 +137,9 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
 /// region, and a branch from any block with an successor-less OpenMP terminator
 /// to `continuationBlock`. Populates `continuationBlockPHIs` with the PHI nodes
 /// of the continuation block if provided.
-static llvm::BasicBlock *convertOmpOpRegions(
+static llvm::Expected<llvm::BasicBlock *> convertOmpOpRegions(
     Region &region, StringRef blockName, llvm::IRBuilderBase &builder,
-    LLVM::ModuleTranslation &moduleTranslation, LogicalResult &bodyGenStatus,
+    LLVM::ModuleTranslation &moduleTranslation,
     SmallVectorImpl<llvm::PHINode *> *continuationBlockPHIs = nullptr) {
   llvm::BasicBlock *continuationBlock =
       splitBB(builder, true, "omp.region.cont");
@@ -215,10 +215,8 @@ static llvm::BasicBlock *convertOmpOpRegions(
 
     llvm::IRBuilderBase::InsertPointGuard guard(builder);
     if (failed(
-            moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder))) {
-      bodyGenStatus = failure();
-      return continuationBlock;
-    }
+            moduleTranslation.convertBlock(*bb, bb->isEntryBlock(), builder)))
+      return llvm::createStringError("failed region translation");
 
     // Special handling for `omp.yield` and `omp.terminator` (we may have more
     // than one): they return the control to the parent OpenMP dialect operation
@@ -264,27 +262,81 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
   llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
+/// Helper function to map block arguments defined by ignored loop wrappers to
+/// LLVM values and prevent any uses of those from triggering null pointer
+/// dereferences.
+///
+/// This must be called after block arguments of parent wrappers have already
+/// been mapped to LLVM IR values.
+static LogicalResult
+convertIgnoredWrapper(omp::LoopWrapperInterface &opInst,
+                      LLVM::ModuleTranslation &moduleTranslation) {
+  // Map block arguments directly to the LLVM value associated to the
+  // corresponding operand. This is semantically equivalent to this wrapper not
+  // being present.
+  auto forwardArgs =
+      [&moduleTranslation](llvm::ArrayRef<BlockArgument> blockArgs,
+                           OperandRange operands) {
+        for (auto [arg, var] : llvm::zip_equal(blockArgs, operands))
+          moduleTranslation.mapValue(arg, moduleTranslation.lookupValue(var));
+      };
+
+  return llvm::TypeSwitch<Operation *, LogicalResult>(opInst)
+      .Case([&](omp::SimdOp op) {
+        auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(*op);
+        forwardArgs(blockArgIface.getPrivateBlockArgs(), op.getPrivateVars());
+        forwardArgs(blockArgIface.getReductionBlockArgs(),
+                    op.getReductionVars());
+        return success();
+      })
+      .Default([&](Operation *op) {
+        return op->emitError() << "cannot ignore nested wrapper";
+      });
+}
+
+/// Helper function to call \c convertIgnoredWrapper() for all wrappers of the
+/// given \c loopOp nested inside of \c parentOp. This has the effect of mapping
+/// entry block arguments defined by these operations to outside values.
+///
+/// It must be called after block arguments of \c parentOp have already been
+/// mapped themselves.
+static LogicalResult
+convertIgnoredWrappers(omp::LoopNestOp loopOp,
+                       omp::LoopWrapperInterface parentOp,
+                       LLVM::ModuleTranslation &moduleTranslation) {
+  SmallVector<omp::LoopWrapperInterface> wrappers;
+  loopOp.gatherWrappers(wrappers);
+
+  // Process wrappers nested inside of `parentOp` from outermost to innermost.
+  for (auto it =
+           std::next(std::find(wrappers.rbegin(), wrappers.rend(), parentOp));
+       it != wrappers.rend(); ++it) {
+    if (failed(convertIgnoredWrapper(*it, moduleTranslation)))
+      return failure();
+  }
+
+  return success();
+}
+
 /// Converts an OpenMP 'masked' operation into LLVM IR using OpenMPIRBuilder.
 static LogicalResult
 convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   auto maskedOp = cast<omp::MaskedOp>(opInst);
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
-  LogicalResult bodyGenStatus = success();
 
   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
     // MaskedOp has only one region associated with it.
     auto &region = maskedOp.getRegion();
     builder.restoreIP(codeGenIP);
-    convertOmpOpRegions(region, "omp.masked.region", builder, moduleTranslation,
-                        bodyGenStatus);
+    return convertOmpOpRegions(region, "omp.masked.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   // TODO: Perform finalization actions for variables. This has to be
   // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
+  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
 
   llvm::Value *filterVal = nullptr;
   if (auto filterVar = maskedOp.getFilteredThreadId()) {
@@ -296,8 +348,14 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
   }
   assert(filterVal != nullptr);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMasked(
-      ompLoc, bodyGenCB, finiCB, filterVal));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createMasked(ompLoc, bodyGenCB,
+                                                         finiCB, filterVal);
+
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
   return success();
 }
 
@@ -306,25 +364,28 @@ static LogicalResult
 convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
-  LogicalResult bodyGenStatus = success();
-
   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
     // MasterOp has only one region associated with it.
     auto &region = cast<omp::MasterOp>(opInst).getRegion();
     builder.restoreIP(codeGenIP);
-    convertOmpOpRegions(region, "omp.master.region", builder, moduleTranslation,
-                        bodyGenStatus);
+    return convertOmpOpRegions(region, "omp.master.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   // TODO: Perform finalization actions for variables. This has to be
   // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
+  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createMaster(
-      ompLoc, bodyGenCB, finiCB));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createMaster(ompLoc, bodyGenCB,
+                                                         finiCB);
+
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
   return success();
 }
 
@@ -334,21 +395,19 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
                    LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   auto criticalOp = cast<omp::CriticalOp>(opInst);
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
-  LogicalResult bodyGenStatus = success();
 
   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
     // CriticalOp has only one region associated with it.
     auto &region = cast<omp::CriticalOp>(opInst).getRegion();
     builder.restoreIP(codeGenIP);
-    convertOmpOpRegions(region, "omp.critical.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(region, "omp.critical.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   // TODO: Perform finalization actions for variables. This has to be
   // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
+  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::LLVMContext &llvmContext = moduleTranslation.getLLVMContext();
@@ -366,8 +425,14 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
         llvm::ConstantInt::get(llvm::Type::getInt32Ty(llvmContext),
                                static_cast<int>(criticalDeclareOp.getHint()));
   }
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createCritical(
-      ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createCritical(
+          ompLoc, bodyGenCB, finiCB, criticalOp.getName().value_or(""), hint);
+
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
   return success();
 }
 
@@ -468,27 +533,30 @@ static LogicalResult inlineConvertOmpRegions(
     return success();
   }
 
-  LogicalResult bodyGenStatus = success();
   SmallVector<llvm::PHINode *> phis;
-  llvm::BasicBlock *continuationBlock = convertOmpOpRegions(
-      region, blockName, builder, moduleTranslation, bodyGenStatus, &phis);
-  if (failed(bodyGenStatus))
-    return failure();
+  llvm::Expected<llvm::BasicBlock *> continuationBlock =
+      convertOmpOpRegions(region, blockName, builder, moduleTranslation, &phis);
+
+  if (!continuationBlock)
+    return region.getParentOp()->emitError(
+        llvm::toString(continuationBlock.takeError()));
+
   if (continuationBlockArgs)
     llvm::append_range(*continuationBlockArgs, phis);
-  builder.SetInsertPoint(continuationBlock,
-                         continuationBlock->getFirstInsertionPt());
+  builder.SetInsertPoint(*continuationBlock,
+                         (*continuationBlock)->getFirstInsertionPt());
   return success();
 }
 
 namespace {
 /// Owning equivalents of OpenMPIRBuilder::(Atomic)ReductionGen that are used to
 /// store lambdas with capture.
-using OwningReductionGen = std::function<llvm::OpenMPIRBuilder::InsertPointTy(
-    llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
-    llvm::Value *&)>;
+using OwningReductionGen =
+    std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
+        llvm::OpenMPIRBuilder::InsertPointTy, llvm::Value *, llvm::Value *,
+        llvm::Value *&)>;
 using OwningAtomicReductionGen =
-    std::function<llvm::OpenMPIRBuilder::InsertPointTy(
+    std::function<llvm::OpenMPIRBuilder::InsertPointOrErrorTy(
         llvm::OpenMPIRBuilder::InsertPointTy, llvm::Type *, llvm::Value *,
         llvm::Value *)>;
 } // namespace
@@ -505,19 +573,20 @@ makeReductionGen(omp::DeclareReductionOp decl, llvm::IRBuilderBase &builder,
   OwningReductionGen gen =
       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint,
                 llvm::Value *lhs, llvm::Value *rhs,
-                llvm::Value *&result) mutable {
-        moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
-        moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
-        builder.restoreIP(insertPoint);
-        SmallVector<llvm::Value *> phis;
-        if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
-                                           "omp.reduction.nonatomic.body",
-                                           builder, moduleTranslation, &phis)))
-          return llvm::OpenMPIRBuilder::InsertPointTy();
-        assert(phis.size() == 1);
-        result = phis[0];
-        return builder.saveIP();
-      };
+                llvm::Value *&result) mutable
+      -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
+    moduleTranslation.mapValue(decl.getReductionLhsArg(), lhs);
+    moduleTranslation.mapValue(decl.getReductionRhsArg(), rhs);
+    builder.restoreIP(insertPoint);
+    SmallVector<llvm::Value *> phis;
+    if (failed(inlineConvertOmpRegions(decl.getReductionRegion(),
+                                       "omp.reduction.nonatomic.body", builder,
+                                       moduleTranslation, &phis)))
+      return llvm::createStringError("failed reduction region translation");
+    assert(phis.size() == 1);
+    result = phis[0];
+    return builder.saveIP();
+  };
   return gen;
 }
 
@@ -537,18 +606,19 @@ makeAtomicReductionGen(omp::DeclareReductionOp decl,
   // avoid the dangling reference after the parent function returns.
   OwningAtomicReductionGen atomicGen =
       [&, decl](llvm::OpenMPIRBuilder::InsertPointTy insertPoint, llvm::Type *,
-                llvm::Value *lhs, llvm::Value *rhs) mutable {
-        moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
-        moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
-        builder.restoreIP(insertPoint);
-        SmallVector<llvm::Value *> phis;
-        if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
-                                           "omp.reduction.atomic.body", builder,
-                                           moduleTranslation, &phis)))
-          return llvm::OpenMPIRBuilder::InsertPointTy();
-        assert(phis.empty());
-        return builder.saveIP();
-      };
+                llvm::Value *lhs, llvm::Value *rhs) mutable
+      -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
+    moduleTranslation.mapValue(decl.getAtomicReductionLhsArg(), lhs);
+    moduleTranslation.mapValue(decl.getAtomicReductionRhsArg(), rhs);
+    builder.restoreIP(insertPoint);
+    SmallVector<llvm::Value *> phis;
+    if (failed(inlineConvertOmpRegions(decl.getAtomicReductionRegion(),
+                                       "omp.reduction.atomic.body", builder,
+                                       moduleTranslation, &phis)))
+      return llvm::createStringError("failed reduction region translation");
+    assert(phis.empty());
+    return builder.saveIP();
+  };
   return atomicGen;
 }
 
@@ -593,27 +663,29 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
   if (orderedRegionOp.getParLevelSimd())
     return failure();
 
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
-  LogicalResult bodyGenStatus = success();
-
   auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
     // OrderedOp has only one region associated with it.
     auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
     builder.restoreIP(codeGenIP);
-    convertOmpOpRegions(region, "omp.ordered.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(region, "omp.ordered.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   // TODO: Perform finalization actions for variables. This has to be
   // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
+  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       moduleTranslation.getOpenMPBuilder()->createOrderedThreadsSimd(
-          ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd()));
-  return bodyGenStatus;
+          ompLoc, bodyGenCB, finiCB, !orderedRegionOp.getParLevelSimd());
+
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 namespace {
@@ -811,15 +883,24 @@ static LogicalResult createReductionsAndCleanup(
   // and remove it later.
   llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
   builder.SetInsertPoint(tempTerminator);
-  llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
       ompBuilder->createReductions(builder.saveIP(), allocaIP, reductionInfos,
                                    isByRef, op.getNowait());
-  if (!contInsertPoint.getBlock())
+
+  if (!contInsertPoint)
+    return op.emitError(llvm::toString(contInsertPoint.takeError()));
+
+  if (!contInsertPoint->getBlock())
     return op->emitOpError() << "failed to convert reductions";
-  auto nextInsertionPoint =
-      ompBuilder->createBarrier(contInsertPoint, llvm::omp::OMPD_for);
+
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      ompBuilder->createBarrier(*contInsertPoint, llvm::omp::OMPD_for);
+
+  if (!afterIP)
+    return op.emitError(llvm::toString(afterIP.takeError()));
+
   tempTerminator->eraseFromParent();
-  builder.restoreIP(nextInsertionPoint);
+  builder.restoreIP(*afterIP);
 
   // after the construct, deallocate private reduction variables
   SmallVector<Region *> reductionRegions;
@@ -958,7 +1039,6 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
   LLVM::ModuleTranslation::SaveStack<OpenMPVarMappingStackFrame> mappingGuard(
       moduleTranslation, reductionVariableMap);
 
-  LogicalResult bodyGenStatus = success();
   SmallVector<StorableBodyGenCallbackTy> sectionCBs;
 
   for (Operation &op : *sectionsOp.getRegion().begin()) {
@@ -967,9 +1047,8 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
       continue;
 
     Region &region = sectionOp.getRegion();
-    auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation,
-                      &bodyGenStatus](InsertPointTy allocaIP,
-                                      InsertPointTy codeGenIP) {
+    auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
+                         InsertPointTy allocaIP, InsertPointTy codeGenIP) {
       builder.restoreIP(codeGenIP);
 
       // map the omp.section reduction block argument to the omp.sections block
@@ -985,8 +1064,9 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
         moduleTranslation.mapValue(sectionArg, llvmVal);
       }
 
-      convertOmpOpRegions(region, "omp.section.region", builder,
-                          moduleTranslation, bodyGenStatus);
+      return convertOmpOpRegions(region, "omp.section.region", builder,
+                                 moduleTranslation)
+          .takeError();
     };
     sectionCBs.push_back(sectionCB);
   }
@@ -1003,24 +1083,27 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
   // attribute (shared, private, firstprivate, ...) of variables.
   // Currently defaults to shared.
   auto privCB = [&](InsertPointTy, InsertPointTy codeGenIP, llvm::Value &,
-                    llvm::Value &vPtr,
-                    llvm::Value *&replacementValue) -> InsertPointTy {
+                    llvm::Value &vPtr, llvm::Value *&replacementValue)
+      -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
     replacementValue = &vPtr;
     return codeGenIP;
   };
 
   // TODO: Perform finalization actions for variables. This has to be
   // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
+  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
 
   allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSections(
-      ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
-      sectionsOp.getNowait()));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createSections(
+          ompLoc, allocaIP, sectionCBs, privCB, finiCB, false,
+          sectionsOp.getNowait());
+
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
 
-  if (failed(bodyGenStatus))
-    return bodyGenStatus;
+  builder.restoreIP(*afterIP);
 
   // Process the reductions if required.
   return createReductionsAndCleanup(sectionsOp, builder, moduleTranslation,
@@ -1034,16 +1117,17 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  LogicalResult bodyGenStatus = success();
+
   if (!singleOp.getPrivateVars().empty() || singleOp.getPrivateSyms())
     return singleOp.emitError("unhandled clauses for translation to LLVM IR");
 
   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
     builder.restoreIP(codegenIP);
-    convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
+                               builder, moduleTranslation)
+        .takeError();
   };
-  auto finiCB = [&](InsertPointTy codeGenIP) {};
+  auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
 
   // Handle copyprivate
   Operation::operand_range cpVars = singleOp.getCopyprivateVars();
@@ -1058,9 +1142,16 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
         moduleTranslation.lookupFunction(llvmFuncOp.getName()));
   }
 
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createSingle(
-      ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars, llvmCPFuncs));
-  return bodyGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createSingle(
+          ompLoc, bodyCB, finiCB, singleOp.getNowait(), llvmCPVars,
+          llvmCPFuncs);
+
+  if (!afterIP)
+    return singleOp.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 // Convert an OpenMP Teams construct to LLVM IR using OpenMPIRBuilder
@@ -1068,7 +1159,6 @@ static LogicalResult
 convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
                 LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  LogicalResult bodyGenStatus = success();
   if (!op.getAllocatorVars().empty() || op.getReductionSyms() ||
       !op.getPrivateVars().empty() || op.getPrivateSyms())
     return op.emitError("unhandled clauses for translation to LLVM IR");
@@ -1077,8 +1167,9 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
     LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
         moduleTranslation, allocaIP);
     builder.restoreIP(codegenIP);
-    convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   llvm::Value *numTeamsLower = nullptr;
@@ -1098,9 +1189,15 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
     ifExpr = moduleTranslation.lookupValue(ifVar);
 
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTeams(
-      ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr));
-  return bodyGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createTeams(
+          ompLoc, bodyCB, numTeamsLower, numTeamsUpper, threadLimit, ifExpr);
+
+  if (!afterIP)
+    return op.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 static void
@@ -1134,7 +1231,6 @@ static LogicalResult
 convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
                  LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  LogicalResult bodyGenStatus = success();
   if (taskOp.getUntiedAttr() || taskOp.getMergeableAttr() ||
       taskOp.getInReductionSyms() || taskOp.getPriority() ||
       !taskOp.getAllocateVars().empty() || !taskOp.getPrivateVars().empty() ||
@@ -1148,8 +1244,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
         moduleTranslation, allocaIP);
 
     builder.restoreIP(codegenIP);
-    convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(taskOp.getRegion(), "omp.task.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   SmallVector<llvm::OpenMPIRBuilder::DependData> dds;
@@ -1159,11 +1256,17 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTask(
-      ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
-      moduleTranslation.lookupValue(taskOp.getFinal()),
-      moduleTranslation.lookupValue(taskOp.getIfExpr()), dds));
-  return bodyGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createTask(
+          ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
+          moduleTranslation.lookupValue(taskOp.getFinal()),
+          moduleTranslation.lookupValue(taskOp.getIfExpr()), dds);
+
+  if (!afterIP)
+    return taskOp.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder.
@@ -1171,20 +1274,27 @@ static LogicalResult
 convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
                       LLVM::ModuleTranslation &moduleTranslation) {
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  LogicalResult bodyGenStatus = success();
-  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty()) {
+  if (!tgOp.getTaskReductionVars().empty() || !tgOp.getAllocateVars().empty())
     return tgOp.emitError("unhandled clauses for translation to LLVM IR");
-  }
+
   auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
     builder.restoreIP(codegenIP);
-    convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
+                               builder, moduleTranslation)
+        .takeError();
   };
+
   InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTaskgroup(
-      ompLoc, allocaIP, bodyCB));
-  return bodyGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
+                                                            bodyCB);
+
+  if (!afterIP)
+    return tgOp.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 static LogicalResult
@@ -1208,9 +1318,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
       !wsloopOp.getPrivateVars().empty() || wsloopOp.getPrivateSyms())
     return opInst.emitError("unhandled clauses for translation to LLVM IR");
 
-  // FIXME: Here any other nested wrappers (e.g. omp.simd) are skipped, so
-  // codegen for composite constructs like 'DO/FOR SIMD' will be the same as for
-  // 'DO/FOR'.
   auto loopOp = cast<omp::LoopNestOp>(wsloopOp.getWrappedLoop());
 
   llvm::ArrayRef<bool> isByRef = getIsByRef(wsloopOp.getReductionByref());
@@ -1248,6 +1355,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
           isByRef)))
     return failure();
 
+  // TODO: Replace this with proper composite translation support.
+  // Currently, all nested wrappers are ignored, so 'do/for simd' will be
+  // treated the same as a standalone 'do/for'. This is allowed by the spec,
+  // since it's equivalent to always using a SIMD length of 1.
+  if (failed(convertIgnoredWrappers(loopOp, wsloopOp, moduleTranslation)))
+    return failure();
+
   // Store the mapping between reduction variables and their private copies on
   // ModuleTranslation stack. It can be then recovered when translating
   // omp.reduce operations in a separate call.
@@ -1258,12 +1372,10 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
   // Generator of the canonical loop body.
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
-  LogicalResult bodyGenStatus = success();
-  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
+  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
+                     llvm::Value *iv) -> llvm::Error {
     // Make sure further conversions know about the induction variable.
     moduleTranslation.mapValue(
         loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
@@ -1274,12 +1386,13 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
     bodyInsertPoints.push_back(ip);
 
     if (loopInfos.size() != loopOp.getNumLoops() - 1)
-      return;
+      return llvm::Error::success();
 
     // Convert the body of the loop.
     builder.restoreIP(ip);
-    convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(loopOp.getRegion(), "omp.wsloop.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   // Delegate actual loop construction to the OpenMP IRBuilder.
@@ -1304,12 +1417,16 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
       loc = llvm::OpenMPIRBuilder::LocationDescription(bodyInsertPoints.back());
       computeIP = loopInfos.front()->getPreheaderIP();
     }
-    loopInfos.push_back(ompBuilder->createCanonicalLoop(
-        loc, bodyGen, lowerBound, upperBound, step,
-        /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP));
 
-    if (failed(bodyGenStatus))
-      return failure();
+    llvm::Expected<llvm::CanonicalLoopInfo *> result =
+        ompBuilder->createCanonicalLoop(
+            loc, bodyGen, lowerBound, upperBound, step,
+            /*IsSigned=*/true, loopOp.getLoopInclusive(), computeIP);
+
+    if (!result)
+      return loopOp.emitError(llvm::toString(result.takeError()));
+
+    loopInfos.push_back(*result);
   }
 
   // Collapse loops. Store the insertion point because LoopInfos may get
@@ -1325,11 +1442,15 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
   std::optional<omp::ScheduleModifier> scheduleMod = wsloopOp.getScheduleMod();
   bool isSimd = wsloopOp.getScheduleSimd();
 
-  ompBuilder->applyWorkshareLoop(
-      ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
-      convertToScheduleKind(schedule), chunk, isSimd,
-      scheduleMod == omp::ScheduleModifier::monotonic,
-      scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
+      ompBuilder->applyWorkshareLoop(
+          ompLoc.DL, loopInfo, allocaIP, !wsloopOp.getNowait(),
+          convertToScheduleKind(schedule), chunk, isSimd,
+          scheduleMod == omp::ScheduleModifier::monotonic,
+          scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered);
+
+  if (!wsloopIP)
+    return opInst.emitError(llvm::toString(wsloopIP.takeError()));
 
   // Continue building IR after the loop. Note that the LoopInfo returned by
   // `collapseLoops` points inside the outermost loop and is intended for
@@ -1350,10 +1471,6 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
   ArrayRef<bool> isByRef = getIsByRef(opInst.getReductionByref());
   assert(isByRef.size() == opInst.getNumReductionVars());
-
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
-  LogicalResult bodyGenStatus = success();
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
   // Collect delayed privatization declarations
@@ -1372,7 +1489,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       opInst.getNumReductionVars());
   SmallVector<DeferredStore> deferredStores;
 
-  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+  auto bodyGenCB = [&](InsertPointTy allocaIP,
+                       InsertPointTy codeGenIP) -> llvm::Error {
     // Allocate private vars
     llvm::BranchInst *allocaTerminator =
         llvm::cast<llvm::BranchInst>(allocaIP.getBlock()->getTerminator());
@@ -1418,10 +1536,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
         builder.SetInsertPoint(privAllocBlock->getTerminator());
       }
       if (failed(inlineConvertOmpRegions(allocRegion, "omp.private.alloc",
-                                         builder, moduleTranslation, &phis))) {
-        bodyGenStatus = failure();
-        return;
-      }
+                                         builder, moduleTranslation, &phis)))
+        return llvm::createStringError(
+            "failed to inline `alloc` region of an `omp.private` op in the "
+            "parallel region");
+
       assert(phis.size() == 1 && "expected one allocation to be yielded");
 
       moduleTranslation.mapValue(privateBlockArgs[i], phis[0]);
@@ -1447,7 +1566,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
             opInst, reductionArgs, builder, moduleTranslation, allocaIP,
             reductionDecls, privateReductionVariables, reductionVariableMap,
             deferredStores, isByRef)))
-      bodyGenStatus = failure();
+      return llvm::createStringError("failed reduction vars allocation");
 
     // Apply copy region for firstprivate.
     bool needsFirstprivate =
@@ -1486,10 +1605,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       // in-place convert copy region
       builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
       if (failed(inlineConvertOmpRegions(copyRegion, "omp.private.copy",
-                                         builder, moduleTranslation))) {
-        bodyGenStatus = failure();
-        return;
-      }
+                                         builder, moduleTranslation)))
+        return llvm::createStringError(
+            "failed to inline `copy` region of an `omp.private` op in the "
+            "parallel region");
 
       // ignore unused value yielded from copy region
 
@@ -1538,7 +1657,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       if (failed(inlineConvertOmpRegions(
               reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral",
               builder, moduleTranslation, &phis)))
-        bodyGenStatus = failure();
+        return llvm::createStringError(
+            "failed to inline `init` region of an `omp.declare_reduction` op "
+            "in the parallel region");
       assert(phis.size() == 1 &&
              "expected one value to be yielded from the "
              "reduction neutral element declaration region");
@@ -1582,9 +1703,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
 
     // ParallelOp has only one region associated with it.
     builder.restoreIP(codeGenIP);
-    auto regionBlock =
-        convertOmpOpRegions(opInst.getRegion(), "omp.par.region", builder,
-                            moduleTranslation, bodyGenStatus);
+    llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions(
+        opInst.getRegion(), "omp.par.region", builder, moduleTranslation);
+    if (!regionBlock)
+      return regionBlock.takeError();
 
     // Process the reductions if required.
     if (opInst.getNumReductionVars() > 0) {
@@ -1597,23 +1719,25 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
                            privateReductionVariables, reductionInfos);
 
       // Move to region cont block
-      builder.SetInsertPoint(regionBlock->getTerminator());
+      builder.SetInsertPoint((*regionBlock)->getTerminator());
 
       // Generate reductions from info
       llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable();
       builder.SetInsertPoint(tempTerminator);
 
-      llvm::OpenMPIRBuilder::InsertPointTy contInsertPoint =
+      llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
           ompBuilder->createReductions(builder.saveIP(), allocaIP,
                                        reductionInfos, isByRef, false);
-      if (!contInsertPoint.getBlock()) {
-        bodyGenStatus = opInst->emitOpError() << "failed to convert reductions";
-        return;
-      }
+      if (!contInsertPoint)
+        return contInsertPoint.takeError();
+
+      if (!contInsertPoint->getBlock())
+        return llvm::createStringError("failed to convert reductions");
 
       tempTerminator->eraseFromParent();
-      builder.restoreIP(contInsertPoint);
+      builder.restoreIP(*contInsertPoint);
     }
+    return llvm::Error::success();
   };
 
   auto privCB = [](InsertPointTy allocaIP, InsertPointTy codeGenIP,
@@ -1626,7 +1750,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
 
   // TODO: Perform finalization actions for variables. This has to be
   // called for variables which have destructors/finalizers.
-  auto finiCB = [&](InsertPointTy codeGenIP) {
+  auto finiCB = [&](InsertPointTy codeGenIP) -> llvm::Error {
     InsertPointTy oldIP = builder.saveIP();
     builder.restoreIP(codeGenIP);
 
@@ -1640,7 +1764,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
     if (failed(inlineOmpRegionCleanup(
             reductionCleanupRegions, privateReductionVariables,
             moduleTranslation, builder, "omp.reduction.cleanup")))
-      bodyGenStatus = failure();
+      return llvm::createStringError(
+          "failed to inline `cleanup` region of an `omp.declare_reduction` op "
+          "in the parallel region");
 
     SmallVector<Region *> privateCleanupRegions;
     llvm::transform(privateDecls, std::back_inserter(privateCleanupRegions),
@@ -1651,9 +1777,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
     if (failed(inlineOmpRegionCleanup(
             privateCleanupRegions, llvmPrivateVars, moduleTranslation, builder,
             "omp.private.dealloc", /*shouldLoadCleanupRegionArg=*/false)))
-      bodyGenStatus = failure();
+      return llvm::createStringError("failed to inline `dealloc` region of an "
+                                     "`omp.private` op in the parallel region");
 
     builder.restoreIP(oldIP);
+    return llvm::Error::success();
   };
 
   llvm::Value *ifCond = nullptr;
@@ -1672,11 +1800,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
       findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
-  builder.restoreIP(
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
       ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
-                                 ifCond, numThreads, pbKind, isCancellable));
+                                 ifCond, numThreads, pbKind, isCancellable);
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
 
-  return bodyGenStatus;
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 /// Convert Order attribute to llvm::omp::OrderKind.
@@ -1718,12 +1849,10 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
 
   // Generator of the canonical loop body.
-  // TODO: support error propagation in OpenMPIRBuilder and use it instead of
-  // relying on captured variables.
   SmallVector<llvm::CanonicalLoopInfo *> loopInfos;
   SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> bodyInsertPoints;
-  LogicalResult bodyGenStatus = success();
-  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip, llvm::Value *iv) {
+  auto bodyGen = [&](llvm::OpenMPIRBuilder::InsertPointTy ip,
+                     llvm::Value *iv) -> llvm::Error {
     // Make sure further conversions know about the induction variable.
     moduleTranslation.mapValue(
         loopOp.getRegion().front().getArgument(loopInfos.size()), iv);
@@ -1734,12 +1863,13 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
     bodyInsertPoints.push_back(ip);
 
     if (loopInfos.size() != loopOp.getNumLoops() - 1)
-      return;
+      return llvm::Error::success();
 
     // Convert the body of the loop.
     builder.restoreIP(ip);
-    convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
-                        moduleTranslation, bodyGenStatus);
+    return convertOmpOpRegions(loopOp.getRegion(), "omp.simd.region", builder,
+                               moduleTranslation)
+        .takeError();
   };
 
   // Delegate actual loop construction to the OpenMP IRBuilder.
@@ -1765,12 +1895,16 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
                                                        ompLoc.DL);
       computeIP = loopInfos.front()->getPreheaderIP();
     }
-    loopInfos.push_back(ompBuilder->createCanonicalLoop(
-        loc, bodyGen, lowerBound, upperBound, step,
-        /*IsSigned=*/true, /*Inclusive=*/true, computeIP));
 
-    if (failed(bodyGenStatus))
-      return failure();
+    llvm::Expected<llvm::CanonicalLoopInfo *> result =
+        ompBuilder->createCanonicalLoop(
+            loc, bodyGen, lowerBound, upperBound, step,
+            /*IsSigned=*/true, /*InclusiveStop=*/true, computeIP);
+
+    if (!result)
+      return loopOp->emitError(llvm::toString(result.takeError()));
+
+    loopInfos.push_back(*result);
   }
 
   // Collapse loops.
@@ -1921,18 +2055,17 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
       convertAtomicOrdering(opInst.getMemoryOrder());
 
   // Generate update code.
-  LogicalResult updateGenStatus = success();
-  auto updateFn = [&opInst, &moduleTranslation, &updateGenStatus](
-                      llvm::Value *atomicx,
-                      llvm::IRBuilder<> &builder) -> llvm::Value * {
+  auto updateFn =
+      [&opInst, &moduleTranslation](
+          llvm::Value *atomicx,
+          llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
     Block &bb = *opInst.getRegion().begin();
     moduleTranslation.mapValue(*opInst.getRegion().args_begin(), atomicx);
     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
-    if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
-      updateGenStatus = (opInst.emitError()
-                         << "unable to convert update operation to llvm IR");
-      return nullptr;
-    }
+    if (failed(moduleTranslation.convertBlock(bb, true, builder)))
+      return llvm::createStringError(
+          "unable to convert update operation to llvm IR");
+
     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
     assert(yieldop && yieldop.getResults().size() == 1 &&
            "terminator must be omp.yield op and it must have exactly one "
@@ -1943,10 +2076,16 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
   // Handle ambiguous alloca, if any.
   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(ompBuilder->createAtomicUpdate(
-      ompLoc, allocaIP, llvmAtomicX, llvmExpr, atomicOrdering, binop, updateFn,
-      isXBinopExpr));
-  return updateGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
+                                     atomicOrdering, binop, updateFn,
+                                     isXBinopExpr);
+
+  if (!afterIP)
+    return opInst.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 static LogicalResult
@@ -2007,20 +2146,19 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
   llvm::AtomicOrdering atomicOrdering =
       convertAtomicOrdering(atomicCaptureOp.getMemoryOrder());
 
-  LogicalResult updateGenStatus = success();
-  auto updateFn = [&](llvm::Value *atomicx,
-                      llvm::IRBuilder<> &builder) -> llvm::Value * {
+  auto updateFn =
+      [&](llvm::Value *atomicx,
+          llvm::IRBuilder<> &builder) -> llvm::Expected<llvm::Value *> {
     if (atomicWriteOp)
       return moduleTranslation.lookupValue(atomicWriteOp.getExpr());
     Block &bb = *atomicUpdateOp.getRegion().begin();
     moduleTranslation.mapValue(*atomicUpdateOp.getRegion().args_begin(),
                                atomicx);
     moduleTranslation.mapBlock(&bb, builder.GetInsertBlock());
-    if (failed(moduleTranslation.convertBlock(bb, true, builder))) {
-      updateGenStatus = (atomicUpdateOp.emitError()
-                         << "unable to convert update operation to llvm IR");
-      return nullptr;
-    }
+    if (failed(moduleTranslation.convertBlock(bb, true, builder)))
+      return llvm::createStringError(
+          "unable to convert update operation to llvm IR");
+
     omp::YieldOp yieldop = dyn_cast<omp::YieldOp>(bb.getTerminator());
     assert(yieldop && yieldop.getResults().size() == 1 &&
            "terminator must be omp.yield op and it must have exactly one "
@@ -2031,10 +2169,16 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
   // Handle ambiguous alloca, if any.
   auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
-  builder.restoreIP(ompBuilder->createAtomicCapture(
-      ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
-      binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr));
-  return updateGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
+      ompBuilder->createAtomicCapture(
+          ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
+          binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
+
+  if (!afterIP)
+    return atomicCaptureOp.emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 /// Converts an OpenMP Threadprivate operation into LLVM IR using
@@ -3019,8 +3163,8 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
       };
 
   using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
-  LogicalResult bodyGenStatus = success();
-  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType) {
+  auto bodyGenCB = [&](InsertPointTy codeGenIP, BodyGenTy bodyGenType)
+      -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
     assert(isa<omp::TargetDataOp>(op) &&
            "BodyGen requested for non TargetDataOp");
     auto blockArgIface = cast<omp::BlockArgOpenMPOpInterface>(op);
@@ -3046,8 +3190,10 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
                        return info.DevicePtrInfoMap[basePointer].second;
                      });
 
-        bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
-                                                builder, moduleTranslation);
+        if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
+                                           moduleTranslation)))
+          return llvm::createStringError(
+              "failed to inline region of an `omp.target_data` op");
       }
       break;
     case BodyGenTy::DupNoPriv:
@@ -3067,8 +3213,10 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
                        mapData.BasePointers, mapData.DevicePointers);
         }
 
-        bodyGenStatus = inlineConvertOmpRegions(region, "omp.data.region",
-                                                builder, moduleTranslation);
+        if (failed(inlineConvertOmpRegions(region, "omp.data.region", builder,
+                                           moduleTranslation)))
+          return llvm::createStringError(
+              "failed to inline region of an `omp.target_data` op");
       }
       break;
     }
@@ -3078,17 +3226,21 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
   llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
       findAllocaInsertPoint(builder, moduleTranslation);
-  if (isa<omp::TargetDataOp>(op)) {
-    builder.restoreIP(ompBuilder->createTargetData(
-        ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
-        info, genMapInfoCB, nullptr, bodyGenCB));
-  } else {
-    builder.restoreIP(ompBuilder->createTargetData(
-        ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
-        info, genMapInfoCB, &RTLFn));
-  }
-
-  return bodyGenStatus;
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
+    if (isa<omp::TargetDataOp>(op))
+      return ompBuilder->createTargetData(
+          ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID),
+          ifCond, info, genMapInfoCB, nullptr, bodyGenCB);
+    return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
+                                        builder.getInt64(deviceID), ifCond,
+                                        info, genMapInfoCB, &RTLFn);
+  }();
+
+  if (!afterIP)
+    return op->emitError(llvm::toString(afterIP.takeError()));
+
+  builder.restoreIP(*afterIP);
+  return success();
 }
 
 /// Lowers the FlagsAttr which is applied to the module on the device
@@ -3320,6 +3472,8 @@ createDeviceArgumentAccessor(MapInfoData &mapData, llvm::Argument &arg,
   }
   case omp::VariableCaptureKind::This:
   case omp::VariableCaptureKind::VLAType:
+    // TODO: Consider returning error to use standard reporting for
+    // unimplemented features.
     assert(false && "Currently unsupported capture kind");
     break;
   }
@@ -3350,10 +3504,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   bool isOffloadEntry =
       isTargetDevice || !ompBuilder->Config.TargetTriples.empty();
 
-  LogicalResult bodyGenStatus = success();
   using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
-  auto bodyCB = [&](InsertPointTy allocaIP,
-                    InsertPointTy codeGenIP) -> InsertPointTy {
+  auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
+      -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
     // Forward target-cpu and target-features function attributes from the
     // original function to the new outlined function.
     llvm::Function *llvmParentFn =
@@ -3396,34 +3549,35 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
         if (privatizer.getDataSharingType() ==
                 omp::DataSharingClauseType::FirstPrivate ||
             !privatizer.getDeallocRegion().empty()) {
-          opInst.emitError("Translation of omp.target from MLIR to LLVMIR "
-                           "failed because translation of firstprivate and "
-                           " private allocatables is not supported yet");
-          bodyGenStatus = failure();
-        } else {
-          moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
-                                     moduleTranslation.lookupValue(privVar));
-          Region &allocRegion = privatizer.getAllocRegion();
-          SmallVector<llvm::Value *, 1> yieldedValues;
-          if (failed(inlineConvertOmpRegions(
-                  allocRegion, "omp.targetop.privatizer", builder,
-                  moduleTranslation, &yieldedValues))) {
-            opInst.emitError(
-                "failed to inline `alloc` region of an `omp.private` "
-                "op in the target region");
-            bodyGenStatus = failure();
-          } else {
-            assert(yieldedValues.size() == 1);
-            moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
-          }
-          moduleTranslation.forgetMapping(allocRegion);
-          builder.restoreIP(builder.saveIP());
+          return llvm::createStringError(
+              "Translation of omp.target from MLIR to LLVMIR "
+              "failed because translation of firstprivate and "
+              " private allocatables is not supported yet");
+        }
+        moduleTranslation.mapValue(privatizer.getAllocMoldArg(),
+                                   moduleTranslation.lookupValue(privVar));
+        Region &allocRegion = privatizer.getAllocRegion();
+        SmallVector<llvm::Value *, 1> yieldedValues;
+        if (failed(inlineConvertOmpRegions(
+                allocRegion, "omp.targetop.privatizer", builder,
+                moduleTranslation, &yieldedValues))) {
+          return llvm::createStringError(
+              "failed to inline `alloc` region of an `omp.private` "
+              "op in the target region");
         }
+        assert(yieldedValues.size() == 1);
+        moduleTranslation.mapValue(privBlockArg, yieldedValues.front());
+        moduleTranslation.forgetMapping(allocRegion);
+        builder.restoreIP(builder.saveIP());
       }
     }
-    llvm::BasicBlock *exitBlock = convertOmpOpRegions(
-        targetRegion, "omp.target", builder, moduleTranslation, bodyGenStatus);
-    builder.SetInsertPoint(exitBlock);
+
+    llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions(
+        targetRegion, "omp.target", builder, moduleTranslation);
+    if (!exitBlock)
+      return exitBlock.takeError();
+
+    builder.SetInsertPoint(*exitBlock);
     return builder.saveIP();
   };
 
@@ -3455,7 +3609,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
 
   auto argAccessorCB = [&](llvm::Argument &arg, llvm::Value *input,
                            llvm::Value *&retVal, InsertPointTy allocaIP,
-                           InsertPointTy codeGenIP) {
+                           InsertPointTy codeGenIP)
+      -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
     // We just return the unaltered argument for the host function
     // for now, some alterations may be required in the future to
     // keep host fallback functions working identically to the device
@@ -3486,10 +3641,16 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
   buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
                   moduleTranslation, dds);
 
-  builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createTarget(
-      ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
-      defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
-      argAccessorCB, dds, targetOp.getNowait()));
+  llvm::OpenMPIRBuilder::InsertPointOrErrorTy result =
+      moduleTranslation.getOpenMPBuilder()->createTarget(
+          ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), entryInfo,
+          defaultValTeams, defaultValThreads, kernelInput, genMapInfoCB, bodyCB,
+          argAccessorCB, dds, targetOp.getNowait());
+
+  if (!result)
+    return opInst.emitError(llvm::toString(result.takeError()));
+
+  builder.restoreIP(*result);
 
   // Remap access operations to declare target reference pointers for the
   // device, essentially generating extra loadop's as necessary
@@ -3497,7 +3658,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
     handleDeclareTargetMapVar(mapData, moduleTranslation, builder,
                               llvmOutlinedFn);
 
-  return bodyGenStatus;
+  return success();
 }
 
 static LogicalResult
@@ -3618,8 +3779,13 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
   llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
 
   return llvm::TypeSwitch<Operation *, LogicalResult>(op)
-      .Case([&](omp::BarrierOp) {
-        ompBuilder->createBarrier(builder.saveIP(), llvm::omp::OMPD_barrier);
+      .Case([&](omp::BarrierOp) -> LogicalResult {
+        llvm::OpenMPIRBuilder::InsertPointOrErrorTy result =
+            ompBuilder->createBarrier(builder.saveIP(),
+                                      llvm::omp::OMPD_barrier);
+        if (!result)
+          return op->emitError(llvm::toString(result.takeError()));
+
         return success();
       })
       .Case([&](omp::TaskyieldOp) {
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index 3cfcaa965f3546a..0a62628b9ad2407 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -1382,16 +1382,21 @@ void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op,
   assert(newValues.size() == op->getNumResults());
   assert(!ignoredOps.contains(op) && "operation was already replaced");
 
+  // Check if replaced op is an unresolved materialization, i.e., an
+  // unrealized_conversion_cast op that was created by the conversion driver.
+  bool isUnresolvedMaterialization = false;
+  if (auto castOp = dyn_cast<UnrealizedConversionCastOp>(op))
+    if (unresolvedMaterializations.contains(castOp))
+      isUnresolvedMaterialization = true;
+
   // Create mappings for each of the new result values.
   for (auto [newValue, result] : llvm::zip(newValues, op->getResults())) {
     if (!newValue) {
       // This result was dropped and no replacement value was provided.
-      if (auto castOp = dyn_cast<UnrealizedConversionCastOp>(op)) {
-        if (unresolvedMaterializations.contains(castOp)) {
-          // Do not create another materializations if we are erasing a
-          // materialization.
-          continue;
-        }
+      if (isUnresolvedMaterialization) {
+        // Do not create another materializations if we are erasing a
+        // materialization.
+        continue;
       }
 
       // Materialize a replacement value "out of thin air".
@@ -1400,10 +1405,20 @@ void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op,
           result.getLoc(), /*inputs=*/ValueRange(),
           /*outputType=*/result.getType(), /*originalType=*/Type(),
           currentTypeConverter);
+    } else {
+      // Make sure that the user does not mess with unresolved materializations
+      // that were inserted by the conversion driver. We keep track of these
+      // ops in internal data structures. Erasing them must be allowed because
+      // this can happen when the user is erasing an entire block (including
+      // its body). But replacing them with another value should be forbidden
+      // to avoid problems with the `mapping`.
+      assert(!isUnresolvedMaterialization &&
+             "attempting to replace an unresolved materialization");
     }
 
-    // Remap, and check for any result type changes.
-    mapping.map(result, newValue);
+    // Remap result to replacement value.
+    if (newValue)
+      mapping.map(result, newValue);
   }
 
   appendRewrite<ReplaceOperationRewrite>(op, currentTypeConverter);
@@ -2442,11 +2457,11 @@ legalizeUnresolvedMaterialization(RewriterBase &rewriter,
     }
   }
 
-  InFlightDiagnostic diag = op->emitError()
-                            << "failed to legalize unresolved materialization "
-                               "from ("
-                            << inputOperands.getTypes() << ") to " << outputType
-                            << " that remained live after conversion";
+  InFlightDiagnostic diag =
+      op->emitError() << "failed to legalize unresolved materialization "
+                         "from ("
+                      << inputOperands.getTypes() << ") to (" << outputType
+                      << ") that remained live after conversion";
   diag.attachNote(op->getUsers().begin()->getLoc())
       << "see existing live user here: " << *op->getUsers().begin();
   return failure();
@@ -2831,11 +2846,29 @@ Value TypeConverter::materializeTargetConversion(OpBuilder &builder,
                                                  Location loc, Type resultType,
                                                  ValueRange inputs,
                                                  Type originalType) const {
+  SmallVector<Value> result = materializeTargetConversion(
+      builder, loc, TypeRange(resultType), inputs, originalType);
+  if (result.empty())
+    return nullptr;
+  assert(result.size() == 1 && "expected single result");
+  return result.front();
+}
+
+SmallVector<Value> TypeConverter::materializeTargetConversion(
+    OpBuilder &builder, Location loc, TypeRange resultTypes, ValueRange inputs,
+    Type originalType) const {
   for (const TargetMaterializationCallbackFn &fn :
-       llvm::reverse(targetMaterializations))
-    if (Value result = fn(builder, resultType, inputs, loc, originalType))
-      return result;
-  return nullptr;
+       llvm::reverse(targetMaterializations)) {
+    SmallVector<Value> result =
+        fn(builder, resultTypes, inputs, loc, originalType);
+    if (result.empty())
+      continue;
+    assert(TypeRange(ValueRange(result)) == resultTypes &&
+           "callback produced incorrect number of values or values with "
+           "incorrect types");
+    return result;
+  }
+  return {};
 }
 
 std::optional<TypeConverter::SignatureConversion>
diff --git a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp
index 19e29d48623e04c..c208716891ef1f4 100644
--- a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp
+++ b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp
@@ -17,20 +17,6 @@
 using namespace llvm;
 using namespace mlir;
 
-std::optional<SmallVector<Value>>
-OneToNTypeConverter::materializeTargetConversion(OpBuilder &builder,
-                                                 Location loc,
-                                                 TypeRange resultTypes,
-                                                 Value input) const {
-  for (const OneToNMaterializationCallbackFn &fn :
-       llvm::reverse(oneToNTargetMaterializations)) {
-    if (std::optional<SmallVector<Value>> result =
-            fn(builder, resultTypes, input, loc))
-      return *result;
-  }
-  return std::nullopt;
-}
-
 TypeRange OneToNTypeMapping::getConvertedTypes(unsigned originalTypeNo) const {
   TypeRange convertedTypes = getConvertedTypes();
   if (auto mapping = getInputMapping(originalTypeNo))
@@ -268,20 +254,20 @@ Block *OneToNPatternRewriter::applySignatureConversion(
 LogicalResult
 OneToNConversionPattern::matchAndRewrite(Operation *op,
                                          PatternRewriter &rewriter) const {
-  auto *typeConverter = getTypeConverter<OneToNTypeConverter>();
+  auto *typeConverter = getTypeConverter();
 
   // Construct conversion mapping for results.
   Operation::result_type_range originalResultTypes = op->getResultTypes();
   OneToNTypeMapping resultMapping(originalResultTypes);
-  if (failed(typeConverter->computeTypeMapping(originalResultTypes,
-                                               resultMapping)))
+  if (failed(typeConverter->convertSignatureArgs(originalResultTypes,
+                                                 resultMapping)))
     return failure();
 
   // Construct conversion mapping for operands.
   Operation::operand_type_range originalOperandTypes = op->getOperandTypes();
   OneToNTypeMapping operandMapping(originalOperandTypes);
-  if (failed(typeConverter->computeTypeMapping(originalOperandTypes,
-                                               operandMapping)))
+  if (failed(typeConverter->convertSignatureArgs(originalOperandTypes,
+                                                 operandMapping)))
     return failure();
 
   // Cast operands to target types.
@@ -318,7 +304,7 @@ namespace mlir {
 // inserted by this pass are annotated with a string attribute that also
 // documents which kind of the cast (source, argument, or target).
 LogicalResult
-applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter,
+applyPartialOneToNConversion(Operation *op, TypeConverter &typeConverter,
                              const FrozenRewritePatternSet &patterns) {
 #ifndef NDEBUG
   // Remember existing unrealized casts. This data structure is only used in
@@ -370,15 +356,13 @@ applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter,
       // Target materialization.
       assert(!areOperandTypesLegal && areResultsTypesLegal &&
              operands.size() == 1 && "found unexpected target cast");
-      std::optional<SmallVector<Value>> maybeResults =
-          typeConverter.materializeTargetConversion(
-              rewriter, castOp->getLoc(), resultTypes, operands.front());
-      if (!maybeResults) {
+      materializedResults = typeConverter.materializeTargetConversion(
+          rewriter, castOp->getLoc(), resultTypes, operands.front());
+      if (materializedResults.empty()) {
         emitError(castOp->getLoc())
             << "failed to create target materialization";
         return failure();
       }
-      materializedResults = maybeResults.value();
     } else {
       // Source and argument materializations.
       assert(areOperandTypesLegal && !areResultsTypesLegal &&
@@ -427,18 +411,18 @@ class FunctionOpInterfaceSignatureConversion : public OneToNConversionPattern {
                                 const OneToNTypeMapping &resultMapping,
                                 ValueRange convertedOperands) const override {
     auto funcOp = cast<FunctionOpInterface>(op);
-    auto *typeConverter = getTypeConverter<OneToNTypeConverter>();
+    auto *typeConverter = getTypeConverter();
 
     // Construct mapping for function arguments.
     OneToNTypeMapping argumentMapping(funcOp.getArgumentTypes());
-    if (failed(typeConverter->computeTypeMapping(funcOp.getArgumentTypes(),
-                                                 argumentMapping)))
+    if (failed(typeConverter->convertSignatureArgs(funcOp.getArgumentTypes(),
+                                                   argumentMapping)))
       return failure();
 
     // Construct mapping for function results.
     OneToNTypeMapping funcResultMapping(funcOp.getResultTypes());
-    if (failed(typeConverter->computeTypeMapping(funcOp.getResultTypes(),
-                                                 funcResultMapping)))
+    if (failed(typeConverter->convertSignatureArgs(funcOp.getResultTypes(),
+                                                   funcResultMapping)))
       return failure();
 
     // Nothing to do if the op doesn't have any non-identity conversions for its
diff --git a/mlir/test/Conversion/MathToFuncs/ctlz.mlir b/mlir/test/Conversion/MathToFuncs/ctlz.mlir
index 4e262417d6a959d..b7ef0a8928912da 100644
--- a/mlir/test/Conversion/MathToFuncs/ctlz.mlir
+++ b/mlir/test/Conversion/MathToFuncs/ctlz.mlir
@@ -91,3 +91,13 @@ func.func @main(%arg0: i8) {
   func.return
 }
 
+// -----
+
+// Check that index is not converted
+
+// CHECK-LABEL: func.func @ctlz_index
+// CHECK:         math.ctlz
+func.func @ctlz_index(%arg0: index) {
+  %0 = math.ctlz %arg0 : index
+  func.return
+}
diff --git a/mlir/test/Conversion/MathToFuncs/ipowi.mlir b/mlir/test/Conversion/MathToFuncs/ipowi.mlir
index e464e9ca9564fca..2702a1e22e621de 100644
--- a/mlir/test/Conversion/MathToFuncs/ipowi.mlir
+++ b/mlir/test/Conversion/MathToFuncs/ipowi.mlir
@@ -170,3 +170,14 @@ func.func @ipowi_vec(%arg0: vector<2x3xi64>, %arg1: vector<2x3xi64>) {
   %0 = math.ipowi %arg0, %arg1 : vector<2x3xi64>
   func.return
 }
+
+// -----
+
+// Check that index is not converted
+
+// CHECK-LABEL: func.func @ipowi_index
+// CHECK:         math.ipowi
+func.func @ipowi_index(%arg0: index, %arg1: index) {
+  %0 = math.ipowi %arg0, %arg1 : index
+  func.return
+}
diff --git a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
index ddd96bf797e6e71..e0ea18d41f66dae 100644
--- a/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
+++ b/mlir/test/Conversion/MathToROCDL/math-to-rocdl.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-math-to-rocdl -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -convert-math-to-rocdl -allow-unregistered-dialect -split-input-file | FileCheck %s
 
 module @test_module {
   // CHECK: llvm.func @__ocml_fmod_f16(f16, f16) -> f16
@@ -481,3 +481,17 @@ module @test_module {
     func.return %resultf16, %resultf32, %resultf64, %resultbf16 : f16, f32, f64, bf16
   }
 }
+
+// -----
+
+// Math operation not inside function
+// Ensure it not crash
+
+module {
+  "test.some_op_with_region"() ({
+  ^bb0(%arg0: f64):
+    // CHECK: math.atan
+    %0 = math.atan %arg0 : f64
+    "test.possible_terminator"() : () -> ()
+  }) : () -> ()
+}
diff --git a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir
index 55b1bc9c545a855..ec5ceae57ccb33c 100644
--- a/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/expand-then-convert-to-llvm.mlir
@@ -425,8 +425,6 @@ func.func @collapse_shape_dynamic_with_non_identity_layout(
 // CHECK:           %[[SIZE1:.*]] = llvm.extractvalue %[[MEM]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK:           %[[SIZE2:.*]] = llvm.extractvalue %[[MEM]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK:           %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
-// CHECK:           %[[STRIDE0_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[STRIDE0]] : i64 to index
-// CHECK:           %[[STRIDE0:.*]] = builtin.unrealized_conversion_cast %[[STRIDE0_TO_IDX]] : index to i64
 // CHECK:           %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE1]], %[[SIZE2]]  : i64
 // CHECK:           %[[SIZE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[FINAL_SIZE1]] : i64 to index
 // CHECK:           %[[FINAL_SIZE1:.*]] = builtin.unrealized_conversion_cast %[[SIZE1_TO_IDX]] : index to i64
@@ -548,23 +546,19 @@ func.func @collapse_shape_dynamic(%arg0 : memref<1x2x?xf32>) -> memref<1x?xf32>
 // CHECK:           %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64
 // CHECK:           %[[SIZE2:.*]] = llvm.extractvalue %[[MEM]][3, 2] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK:           %[[STRIDE0:.*]] = llvm.extractvalue %[[MEM]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
-// CHECK:           %[[STRIDE1:.*]] = llvm.extractvalue %[[MEM]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>
 // CHECK:           %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64
 // CHECK:           %[[FINAL_SIZE1:.*]] = llvm.mul %[[SIZE2]], %[[C2]]  : i64
 // CHECK:           %[[SIZE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[FINAL_SIZE1]] : i64 to index
 // CHECK:           %[[FINAL_SIZE1:.*]] = builtin.unrealized_conversion_cast %[[SIZE1_TO_IDX]] : index to i64
-// CHECK:           %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
-// CHECK:           %[[MIN_STRIDE1:.*]] = llvm.intr.smin(%[[STRIDE1]], %[[C1]]) : (i64, i64) -> i64
-// CHECK:           %[[MIN_STRIDE1_TO_IDX:.*]] = builtin.unrealized_conversion_cast %[[MIN_STRIDE1]] : i64 to index
-// CHECK:           %[[MIN_STRIDE1:.*]] = builtin.unrealized_conversion_cast %[[MIN_STRIDE1_TO_IDX]] : index to i64
 // CHECK:           %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK:           %[[DESC0:.*]] = llvm.insertvalue %[[BASE_BUFFER]], %[[DESC]][0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK:           %[[DESC1:.*]] = llvm.insertvalue %[[ALIGNED_BUFFER]], %[[DESC0]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK:           %[[DESC2:.*]] = llvm.insertvalue %[[C0]], %[[DESC1]][2] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK:           %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64
 // CHECK:           %[[DESC3:.*]] = llvm.insertvalue %[[C1]], %[[DESC2]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK:           %[[DESC4:.*]] = llvm.insertvalue %[[STRIDE0]], %[[DESC3]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK:           %[[DESC5:.*]] = llvm.insertvalue %[[FINAL_SIZE1]], %[[DESC4]][3, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
-// CHECK:           %[[DESC6:.*]] = llvm.insertvalue %[[MIN_STRIDE1]], %[[DESC5]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
+// CHECK:           %[[DESC6:.*]] = llvm.insertvalue %[[C1]], %[[DESC5]][4, 1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)>
 // CHECK:           %[[RES:.*]] = builtin.unrealized_conversion_cast %[[DESC6]] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> to memref<1x?xf32>
 // CHECK:           return %[[RES]] : memref<1x?xf32>
 // CHECK:         }
diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
index 375e2951a037cd9..66b736c18718f3b 100644
--- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
+++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir
@@ -579,13 +579,7 @@ func.func @wgmma_f32_e5m2_e4m3(%descA : i64, %descB : i64) -> !mat32f32 {
 // -----
 
 func.func @elect_one_leader_sync() {  
-  // CHECK: llvm.inline_asm has_side_effects asm_dialect = att "{
-  // CHECK-SAME: .reg .u32 rx;
-  // CHECK-SAME: .reg .pred px;
-  // CHECK-SAME: mov.pred $0, 0;
-  // CHECK-SAME: elect.sync rx | px, 0xFFFFFFFF;
-  // CHECK-SAME: @px mov.pred $0, 1;
-  // CHECK-SAME: "=b"  : () -> i1
+  // CHECK: %[[RES:.*]] = nvvm.elect.sync -> i1
   %cnd = nvvm.elect.sync -> i1 
   return 
 }
diff --git a/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir
index 3557830e779e240..756fc5415e20f7c 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/control-flow-ops-to-llvm.mlir
@@ -86,6 +86,14 @@ spirv.module Logical GLSL450 {
 //===----------------------------------------------------------------------===//
 
 spirv.module Logical GLSL450 {
+  // CHECK-LABEL: @empty_loop
+  spirv.func @empty_loop() "None" {
+    // CHECK: llvm.return
+    spirv.mlir.loop {
+    }
+    spirv.Return
+  }
+
   // CHECK-LABEL: @infinite_loop
   spirv.func @infinite_loop(%count : i32) -> () "None" {
     // CHECK:   llvm.br ^[[BB1:.*]]
diff --git a/mlir/test/Dialect/Arith/int-range-interface.mlir b/mlir/test/Dialect/Arith/int-range-interface.mlir
index 4b04229e5db52f0..6d66da2fc1eb35c 100644
--- a/mlir/test/Dialect/Arith/int-range-interface.mlir
+++ b/mlir/test/Dialect/Arith/int-range-interface.mlir
@@ -178,8 +178,8 @@ func.func @div_bounds_negative(%arg0 : index) -> i1 {
 }
 
 // CHECK-LABEL: func @div_zero_undefined
-// CHECK: %[[ret:.*]] = arith.cmpi ule
-// CHECK: return %[[ret]]
+// CHECK: %[[true:.*]] = arith.constant true
+// CHECK: return %[[true]]
 func.func @div_zero_undefined(%arg0 : index) -> i1 {
     %c0 = arith.constant 0 : index
     %c1 = arith.constant 1 : index
@@ -190,6 +190,19 @@ func.func @div_zero_undefined(%arg0 : index) -> i1 {
     func.return %2 : i1
 }
 
+// CHECK-LABEL: func @div_refine_min
+// CHECK: %[[true:.*]] = arith.constant true
+// CHECK: return %[[true]]
+func.func @div_refine_min(%arg0 : index) -> i1 {
+    %c0 = arith.constant 1 : index
+    %c1 = arith.constant 2 : index
+    %c4 = arith.constant 4 : index
+    %0 = arith.andi %arg0, %c1 : index
+    %1 = arith.divui %c4, %0 : index
+    %2 = arith.cmpi uge, %1, %c0 : index
+    func.return %2 : i1
+}
+
 // CHECK-LABEL: func @ceil_divui
 // CHECK: %[[ret:.*]] = arith.cmpi eq
 // CHECK: return %[[ret]]
@@ -271,13 +284,13 @@ func.func @remui_base(%arg0 : index, %arg1 : index ) -> i1 {
 // CHECK: return %[[true]]
 func.func @remui_base_maybe_zero(%arg0 : index, %arg1 : index ) -> i1 {
     %c4 = arith.constant 4 : index
-    %c5 = arith.constant 5 : index    
+    %c5 = arith.constant 5 : index
 
     %0 = arith.minui %arg1, %c4 : index
     %1 = arith.remui %arg0, %0 : index
     %2 = arith.cmpi ult, %1, %c5 : index
     func.return %2 : i1
-}    
+}
 
 // CHECK-LABEL: func @remsi_base
 // CHECK: %[[ret:.*]] = arith.cmpi sge
diff --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
index ab18ce05e355d3e..bae94c1be4da908 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
@@ -78,7 +78,7 @@ func.func @static_layout_to_no_layout_cast(%m: memref<?xf32, strided<[1], offset
 // memref.cast.
 func.func @no_layout_to_dyn_layout_cast(%m: memref<?xf32>) -> memref<?xf32, strided<[1], offset: ?>> {
   %0 = bufferization.to_tensor %m : memref<?xf32>
-  // expected-error @+1 {{failed to legalize unresolved materialization from ('memref<?xf32>') to 'memref<?xf32, strided<[1], offset: ?>>' that remained live after conversion}}
+  // expected-error @+1 {{failed to legalize unresolved materialization from ('memref<?xf32>') to ('memref<?xf32, strided<[1], offset: ?>>') that remained live after conversion}}
   %1 = bufferization.to_memref %0 : memref<?xf32, strided<[1], offset: ?>>
   // expected-note @below{{see existing live user here}}
   return %1 : memref<?xf32, strided<[1], offset: ?>>
diff --git a/mlir/test/Dialect/GPU/decompose-memrefs.mlir b/mlir/test/Dialect/GPU/decompose-memrefs.mlir
index 56fc9a66b7ace71..1a1922194845170 100644
--- a/mlir/test/Dialect/GPU/decompose-memrefs.mlir
+++ b/mlir/test/Dialect/GPU/decompose-memrefs.mlir
@@ -7,8 +7,8 @@
 //       CHECK:  gpu.launch
 //  CHECK-SAME:  threads(%[[TX:.*]], %[[TY:.*]], %[[TZ:.*]]) in
 //       CHECK:  %[[IDX:.*]] = affine.apply #[[MAP]]()[%[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]]]
-//       CHECK:  %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref<f32> to memref<f32>
-//       CHECK:  memref.store %[[VAL]], %[[PTR]][] : memref<f32>
+//       CHECK:  %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref<f32> to memref<f32, strided<[], offset: ?>>
+//       CHECK:  memref.store %[[VAL]], %[[PTR]][] : memref<f32, strided<[], offset: ?>>
 func.func @decompose_store(%arg0 : f32, %arg1 : memref<?x?x?xf32>) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -33,8 +33,8 @@ func.func @decompose_store(%arg0 : f32, %arg1 : memref<?x?x?xf32>) {
 //       CHECK:  gpu.launch
 //  CHECK-SAME:  threads(%[[TX:.*]], %[[TY:.*]], %[[TZ:.*]]) in
 //       CHECK:  %[[IDX:.*]] = affine.apply #[[MAP]]()[%[[OFFSET]], %[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]], %[[STRIDES]]#2]
-//       CHECK:  %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref<f32> to memref<f32>
-//       CHECK:  memref.store %[[VAL]], %[[PTR]][] : memref<f32>
+//       CHECK:  %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref<f32> to memref<f32, strided<[], offset: ?>>
+//       CHECK:  memref.store %[[VAL]], %[[PTR]][] : memref<f32, strided<[], offset: ?>>
 func.func @decompose_store_strided(%arg0 : f32, %arg1 : memref<?x?x?xf32, strided<[?, ?, ?], offset: ?>>) {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -59,8 +59,8 @@ func.func @decompose_store_strided(%arg0 : f32, %arg1 : memref<?x?x?xf32, stride
 //       CHECK:  gpu.launch
 //  CHECK-SAME:  threads(%[[TX:.*]], %[[TY:.*]], %[[TZ:.*]]) in
 //       CHECK:  %[[IDX:.*]] = affine.apply #[[MAP]]()[%[[TX]], %[[STRIDES]]#0, %[[TY]], %[[STRIDES]]#1, %[[TZ]]]
-//       CHECK:  %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref<f32> to memref<f32>
-//       CHECK:  %[[RES:.*]] = memref.load %[[PTR]][] : memref<f32>
+//       CHECK:  %[[PTR:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[IDX]]], sizes: [], strides: [] : memref<f32> to memref<f32, strided<[], offset: ?>>
+//       CHECK:  %[[RES:.*]] = memref.load %[[PTR]][] : memref<f32, strided<[], offset: ?>>
 //       CHECK:  "test.test"(%[[RES]]) : (f32) -> ()
 func.func @decompose_load(%arg0 : memref<?x?x?xf32>) {
   %c0 = arith.constant 0 : index
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 4bc2ed140da91a6..5de007b390c51d6 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -1232,3 +1232,20 @@ func.func @transpose_buffer(%input: memref<?xf32>,
 //  CHECK-SAME:            %[[VAL_1:.*]]: memref<?xf32>) {
 //       CHECK:     linalg.transpose ins(%[[VAL_0]] : memref<?xf32>)
 //  CHECK-SAME:       outs(%[[VAL_1]] : memref<?xf32>) permutation = [0]
+
+// -----
+
+// This test checks linalg op has a recursive memory effect. Otherwise
+// linalg.map without a user would be DCEd.
+func.func @recursive_effect(%arg : tensor<1xf32>) {
+  %init = arith.constant dense<0.0> : tensor<1xf32>
+  %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>)
+            (%in : f32) {
+              vector.print %in : f32
+              linalg.yield %in : f32
+            }
+  func.return
+}
+
+// CHECK-LABEL: @recursive_effect
+//       CHECK: linalg.map
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index c481a723c5623c4..4b5a66f8fb5b922 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -352,6 +352,15 @@ func.func @illegal_fill_tensor_with_memref_return
 
 // -----
 
+func.func @illegal_fill_value_type(%arg0 : tensor<2x2xf32>, %arg1 : tensor<2xf32>) -> tensor<2x2xf32>
+{
+  // expected-error @+1 {{expected op with scalar input}}
+  %0 = linalg.fill ins(%arg1 : tensor<2xf32>) outs(%arg0 : tensor<2x2xf32>) -> tensor<2x2xf32>
+  return %0 : tensor<2x2xf32>
+}
+
+// -----
+
 func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) {
   // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}}
   linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>)
diff --git a/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
new file mode 100644
index 000000000000000..640de85cc5f12e2
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/vectorization-pad-patterns.mlir
@@ -0,0 +1,277 @@
+// RUN: mlir-opt %s -transform-interpreter -split-input-file | FileCheck %s
+
+///----------------------------------------------------------------------------------------
+/// [Pattern: PadOpVectorizationWithTransferReadPattern]
+///----------------------------------------------------------------------------------------
+// CHECK-LABEL: func @pad_and_transfer_read
+//  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
+//   CHECK-NOT:   tensor.pad
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
+//       CHECK:   return %[[RESULT]]
+func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> {
+  %c0 = arith.constant 0 : index
+  %c5 = arith.constant 5.0 : f32
+  %c6 = arith.constant 6.0 : f32
+  %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %c5 : f32
+  } : tensor<5x6xf32> to tensor<10x13xf32>
+  %1 = vector.transfer_read %0[%c0, %c0], %c6
+      : tensor<10x13xf32>, vector<7x9xf32>
+  return %1 : vector<7x9xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+// -----
+
+///----------------------------------------------------------------------------------------
+/// [Pattern: PadOpVectorizationWithTransferWritePattern]
+///----------------------------------------------------------------------------------------
+func.func private @make_vector() -> vector<7x9xf32>
+
+// CHECK-LABEL: func @pad_and_transfer_write_static_low_and_high
+//  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
+//       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32>
+//       CHECK:   return %[[RESULT]]
+func.func @pad_and_transfer_write_static_low_and_high(
+    %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
+  %c0 = arith.constant 0 : index
+  %c5 = arith.constant 5.0 : f32
+  %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
+    ^bb0(%arg2: index, %arg3: index):
+      tensor.yield %c5 : f32
+  } : tensor<5x6xf32> to tensor<10x13xf32>
+  %1 = call @make_vector() : () -> vector<7x9xf32>
+  %2 = vector.transfer_write %1, %0[%c0, %c0]
+      : vector<7x9xf32>, tensor<10x13xf32>
+  %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
+  return %3 : tensor<5x6xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+// -----
+
+func.func private @make_vector() -> vector<7x9xf32>
+
+// CHECK-LABEL: func @pad_and_transfer_write_static_low_dynamic_high
+//  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
+//       CHECK:   %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
+//       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
+//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<?x6xf32>
+//       CHECK:   return %[[RESULT]]
+func.func @pad_and_transfer_write_static_low_dynamic_high(
+    %arg0: tensor<?x?xf32>, %size: index, %padding: index) -> tensor<?x6xf32> {
+  %c0 = arith.constant 0 : index
+  %c5 = arith.constant 5.0 : f32
+  %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1]
+      : tensor<?x?xf32> to tensor<?x6xf32>
+  %0 = tensor.pad %s low[0, 0] high[%padding, 7] {
+    ^bb0(%arg2: index, %arg3: index):
+      tensor.yield %c5 : f32
+  } : tensor<?x6xf32> to tensor<?x13xf32>
+  %1 = call @make_vector() : () -> vector<7x9xf32>
+  %2 = vector.transfer_write %1, %0[%c0, %c0]
+      : vector<7x9xf32>, tensor<?x13xf32>
+  %3 = tensor.extract_slice %2[0, 0] [%size, 6] [1, 1] : tensor<?x13xf32> to tensor<?x6xf32>
+  return %3 : tensor<?x6xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+
+// -----
+
+///----------------------------------------------------------------------------------------
+/// [Pattern: PadOpVectorizationWithInsertSlicePattern]
+///----------------------------------------------------------------------------------------
+
+func.func private @make_vector() -> tensor<12x13xf32>
+
+// CHECK-LABEL: func @pad_and_insert_slice_source
+//  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
+//   CHECK-NOT:   tensor.pad
+//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
+//   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
+//       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32>
+//       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
+//       CHECK:   %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32>
+//       CHECK:   return %[[WRITE]]
+func.func @pad_and_insert_slice_source(
+    %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
+  %c0 = arith.constant 0 : index
+  %c5 = arith.constant 5.0 : f32
+  %0 = tensor.pad %arg0 low[0, 0] high[2, 3] {
+    ^bb0(%arg2: index, %arg3: index):
+      tensor.yield %c5 : f32
+  } : tensor<5x6xf32> to tensor<7x9xf32>
+  %1 = call @make_vector() : () -> tensor<12x13xf32>
+  %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
+  return %r : tensor<12x13xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+
+// -----
+
+///----------------------------------------------------------------------------------------
+/// tensor::PadOp -> tensor::EmptyOp + linalg::FillOp/tensor::GenerateOp + tensor::InsertSliceOp
+/// [Pattern: GenericPadOpVectorizationPattern + InsertSliceVectorizePattern]
+/// TODO: Split the test into two, one for each pattern.
+///----------------------------------------------------------------------------------------
+
+func.func private @make_vector() -> tensor<12x13xf32>
+
+// Same as @pad_and_insert_slice_dest in vectorization-with-patterns.mlir, but
+// over here linalg::fill is not vectorized (patterns for linalg.fill are not
+// included here)
+// CHECK-LABEL:   func.func @pad_and_insert_slice_dest(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
+//  CHECK-NOT:     tensor.pad
+//  CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+//  CHECK-DAG:     %[[PAD:.*]] = arith.constant 5.000000e+00 : f32
+//  CHECK-DAG:     %[[PAD_READ:.*]] = arith.constant 0.000000e+00 : f32
+//      CHECK:     %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32>
+//      CHECK:     %[[FILL:.*]] = linalg.fill ins(%[[PAD]] : f32) outs(%[[EMPTY]] : tensor<1x12x13xf32>) -> tensor<1x12x13xf32>
+//      CHECK:     %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32>
+//      CHECK:     %[[WRITE_1:.*]] = vector.transfer_write %[[READ_1]], %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32>
+//      CHECK:     %[[VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32>
+//      CHECK:     %[[READ_2:.*]] = vector.transfer_read %[[VEC]]{{\[}}%[[C0]], %[[C0]]], %[[PAD_READ]] {in_bounds = [true, true]} : tensor<12x13xf32>, vector<12x13xf32>
+//      CHECK:     %[[RES:.*]] = vector.transfer_write %[[READ_2]], %[[WRITE_1]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<12x13xf32>, tensor<1x12x13xf32>
+//      CHECK:     return %[[RES]] : tensor<1x12x13xf32>
+
+func.func @pad_and_insert_slice_dest(
+    %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
+  %c5 = arith.constant 5.0 : f32
+  %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] {
+    ^bb0(%arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %c5 : f32
+  } : tensor<1x5x6xf32> to tensor<1x12x13xf32>
+  %1 = call @make_vector() : () -> tensor<12x13xf32>
+  %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32>
+  return %r : tensor<1x12x13xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+// -----
+func.func private @make_vector() -> vector<7x9xf32>
+
+// Variant of @pad_and_transfer_write_static
+
+// CHECK-LABEL: func @pad_and_transfer_write_static_non_zero_low_pad
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   linalg.fill
+func.func @pad_and_transfer_write_static_non_zero_low_pad(
+    %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
+  %c0 = arith.constant 0 : index
+  %c5 = arith.constant 5.0 : f32
+  %0 = tensor.pad %arg0 low[0, 1] high[5, 6] {
+    ^bb0(%arg2: index, %arg3: index):
+      tensor.yield %c5 : f32
+  } : tensor<5x6xf32> to tensor<10x13xf32>
+  %1 = call @make_vector() : () -> vector<7x9xf32>
+  %2 = vector.transfer_write %1, %0[%c0, %c0]
+      : vector<7x9xf32>, tensor<10x13xf32>
+  %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
+  return %3 : tensor<5x6xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
+
+// -----
+func.func private @make_vector() -> vector<7x9xf32>
+
+// Variant of @pad_and_transfer_write_static
+
+// CHECK-LABEL: func @pad_and_transfer_write_static_non_zero_offset
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   linalg.fill
+func.func @pad_and_transfer_write_static_non_zero_offset(
+    %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
+  %c0 = arith.constant 0 : index
+  %c5 = arith.constant 5.0 : f32
+  %0 = tensor.pad %arg0 low[0, 1] high[5, 6] {
+    ^bb0(%arg2: index, %arg3: index):
+      tensor.yield %c5 : f32
+  } : tensor<5x6xf32> to tensor<10x13xf32>
+  %1 = call @make_vector() : () -> vector<7x9xf32>
+  %2 = vector.transfer_write %1, %0[%c0, %c0]
+      : vector<7x9xf32>, tensor<10x13xf32>
+  %3 = tensor.extract_slice %2[0, 1] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
+  return %3 : tensor<5x6xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %func_op = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
+
+    transform.apply_patterns to %func_op {
+      transform.apply_patterns.linalg.pad_vectorization
+    } : !transform.op<"func.func">
+    transform.yield
+  }
+}
diff --git a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
index e9f8e08ca0c6b40..8fbc74ec345c6be 100644
--- a/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-unsupported.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics
+// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s
 
 func.func @conv1d_nwc_wcf_dyn_ch_dim(%input: memref<4x6x?xf32>, %filter: memref<1x?x8xf32>, %output: memref<4x2x8xf32>) {
   // expected-error @+1 {{Attempted to vectorize, but failed}}
@@ -253,3 +253,30 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+// -----
+
+// With dynamically shaped source, the vectorizer infers the vector size for
+// xfer Ops from the destination tensor and, conservatively, assumes
+// out-of-bounds accesses. Out-of-bounds accesses require a pad value, but
+// that's impossible to recover in this example. Hence no vectorization.
+
+// TODO: Use diagnostics once we can vectorize tensor.insert_slice with
+// transform.structured.vectorize
+
+// CHECK-LABEL: @insert_dynamic_slice_unknown_pad
+// CHECK-NOT: vector
+// CHECK: tensor.insert_slice
+func.func @insert_dynamic_slice_unknown_pad(%arg0: tensor<1x?x3xf32>, %arg1: tensor<9x8x7x1x2x3xf32>, %size: index) -> tensor<9x8x7x1x2x3xf32> {
+  %res = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
diff --git a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
index 1c6a786bfa436d9..d2fb3730a2d2b06 100644
--- a/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-with-patterns.mlir
@@ -935,163 +935,24 @@ module attributes {transform.with_named_sequence} {
   }
 }
 
-// -----
-
-// CHECK-LABEL: func @pad_and_transfer_read
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   tensor.pad
-//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
-//       CHECK:   %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
-//       CHECK:   return %[[RESULT]]
-func.func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> {
-  %c0 = arith.constant 0 : index
-  %c5 = arith.constant 5.0 : f32
-  %c6 = arith.constant 6.0 : f32
-  %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
-    ^bb0(%arg1: index, %arg2: index):
-      tensor.yield %c5 : f32
-  } : tensor<5x6xf32> to tensor<10x13xf32>
-  %1 = vector.transfer_read %0[%c0, %c0], %c6
-      : tensor<10x13xf32>, vector<7x9xf32>
-  return %1 : vector<7x9xf32>
-}
-
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
-    %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-// -----
-
-func.func private @make_vector() -> vector<7x9xf32>
-
-// CHECK-LABEL: func @pad_and_transfer_write_static
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   tensor.pad
-//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
-//       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
-//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32>
-//       CHECK:   return %[[RESULT]]
-func.func @pad_and_transfer_write_static(
-    %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
-  %c0 = arith.constant 0 : index
-  %c5 = arith.constant 5.0 : f32
-  %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
-    ^bb0(%arg2: index, %arg3: index):
-      tensor.yield %c5 : f32
-  } : tensor<5x6xf32> to tensor<10x13xf32>
-  %1 = call @make_vector() : () -> vector<7x9xf32>
-  %2 = vector.transfer_write %1, %0[%c0, %c0]
-      : vector<7x9xf32>, tensor<10x13xf32>
-  %3 = tensor.extract_slice %2[0, 0] [5, 6] [1, 1] : tensor<10x13xf32> to tensor<5x6xf32>
-  return %3 : tensor<5x6xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
-    %5 = transform.structured.vectorize_children_and_apply_patterns %4  { vectorize_padding } : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-
-// -----
-
-func.func private @make_vector() -> vector<7x9xf32>
-
-// CHECK-LABEL: func @pad_and_transfer_write_dynamic_static
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index
-//   CHECK-NOT:   tensor.pad
-//       CHECK:   %[[C0:.*]] = arith.constant 0 : index
-//       CHECK:   %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
-//       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
-//       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[SUB]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<?x6xf32>
-//       CHECK:   return %[[RESULT]]
-func.func @pad_and_transfer_write_dynamic_static(
-    %arg0: tensor<?x?xf32>, %size: index, %padding: index) -> tensor<?x6xf32> {
-  %c0 = arith.constant 0 : index
-  %c5 = arith.constant 5.0 : f32
-  %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1]
-      : tensor<?x?xf32> to tensor<?x6xf32>
-  %0 = tensor.pad %s low[0, 0] high[%padding, 7] {
-    ^bb0(%arg2: index, %arg3: index):
-      tensor.yield %c5 : f32
-  } : tensor<?x6xf32> to tensor<?x13xf32>
-  %1 = call @make_vector() : () -> vector<7x9xf32>
-  %2 = vector.transfer_write %1, %0[%c0, %c0]
-      : vector<7x9xf32>, tensor<?x13xf32>
-  %3 = tensor.extract_slice %2[0, 0] [%size, 6] [1, 1] : tensor<?x13xf32> to tensor<?x6xf32>
-  return %3 : tensor<?x6xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
-    %5 = transform.structured.vectorize_children_and_apply_patterns %4 { vectorize_padding } : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-
 // -----
 
 func.func private @make_vector() -> tensor<12x13xf32>
 
-// CHECK-LABEL: func @pad_and_insert_slice_source
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   tensor.pad
-//   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
-//   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
-//       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32>
-//       CHECK:   %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
-//       CHECK:   %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[VEC0]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<7x9xf32>, tensor<12x13xf32>
-//       CHECK:   return %[[WRITE]]
-func.func @pad_and_insert_slice_source(
-    %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
-  %c0 = arith.constant 0 : index
-  %c5 = arith.constant 5.0 : f32
-  %0 = tensor.pad %arg0 low[0, 0] high[2, 3] {
-    ^bb0(%arg2: index, %arg3: index):
-      tensor.yield %c5 : f32
-  } : tensor<5x6xf32> to tensor<7x9xf32>
-  %1 = call @make_vector() : () -> tensor<12x13xf32>
-  %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
-  return %r : tensor<12x13xf32>
-}
-
-module attributes {transform.with_named_sequence} {
-  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
-    %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-    %4 = transform.get_parent_op %3 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
-    %5 = transform.structured.vectorize_children_and_apply_patterns %4  { vectorize_padding } : (!transform.any_op) -> !transform.any_op
-    transform.yield
-  }
-}
-
-
-// -----
-
-func.func private @make_vector() -> tensor<12x13xf32>
-
-// CHECK-LABEL: func @pad_and_insert_slice_dest
-//  CHECK-SAME:     %[[ARG0:.*]]: tensor<1x5x6xf32>
-// Check the insert slice is not rewritten if the padded result is used by the destination operand.
-//   CHECK-NOT:   tensor.pad
-//       CHECK:   %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32>
-//       CHECK:   %[[WRITE_1:.*]] = vector.transfer_write %{{.*}}, %[[EMPTY]]{{.*}} : vector<1x12x13xf32>, tensor<1x12x13xf32>
-//       CHECK:   %[[READ:.*]]  = vector.transfer_read %[[ARG0:.*]]{{.*}} : tensor<1x5x6xf32>, vector<1x5x6xf32>
-//       CHECK:   %[[WRITE_2:.*]] = vector.transfer_write %[[READ]], %[[WRITE_1]]{{.*}} : vector<1x5x6xf32>, tensor<1x12x13xf32>
-//       CHECK:   %[[T1:.*]] = call @make_vector() : () -> tensor<12x13xf32>
-//       CHECK:   tensor.insert_slice %[[T1]] into %[[WRITE_2]]
+// CHECK-LABEL:   func.func @pad_and_insert_slice_dest(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
+// CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[CST:.*]] = arith.constant dense<5.000000e+00> : vector<1x12x13xf32>
+// CHECK:           %[[C0_IDX:.*]] = arith.constant 0 : index
+// CHECK:           %[[PAD_VAL:.*]] = arith.constant 5.000000e+00 : f32
+// CHECK:           %[[EMPTY:.*]] = tensor.empty() : tensor<1x12x13xf32>
+// CHECK:           %[[WRITE_1:.*]] = vector.transfer_write %[[CST]], %[[EMPTY]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x12x13xf32>, tensor<1x12x13xf32>
+// CHECK:           %[[READ_1:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]], %[[PAD_VAL]] {in_bounds = [true, true, true]} : tensor<1x5x6xf32>, vector<1x5x6xf32>
+// CHECK:           %[[WRITE_2:.*]] = vector.transfer_write %[[READ_1]], %[[WRITE_1]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true, true]} : vector<1x5x6xf32>, tensor<1x12x13xf32>
+// CHECK:           %[[MAKE_VEC:.*]] = call @make_vector() : () -> tensor<12x13xf32>
+// CHECK:           %[[READ_2:.*]] = vector.transfer_read %[[MAKE_VEC]]{{\[}}%[[C0_IDX]], %[[C0_IDX]]], %[[C0]] {in_bounds = [true, true]} : tensor<12x13xf32>, vector<12x13xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ_2]], %[[WRITE_2]]{{\[}}%[[C0_IDX]], %[[C0_IDX]], %[[C0_IDX]]] {in_bounds = [true, true]} : vector<12x13xf32>, tensor<1x12x13xf32>
+// CHECK:           return %[[RES]] : tensor<1x12x13xf32>
 func.func @pad_and_insert_slice_dest(
     %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
   %c5 = arith.constant 5.0 : f32
@@ -2067,3 +1928,94 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+// -----
+
+///----------------------------------------------------------------------------------------
+/// tensor.insert_slice
+///----------------------------------------------------------------------------------------
+
+// The pad value for xfer-read is neither needed nor available - use the default (0.0).
+
+// CHECK-LABEL: func @insert_static_slice_default_pad
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x2x3xf32>,
+// CHECK-SAME:      %[[ARG_1:.*]]: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> {
+// CHECK:           %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[C0:.*]] = arith.constant 0 : index
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32>
+// CHECK:           %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[ARG_1]]{{\[}}%[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           return %[[WRITE]] : tensor<9x8x7x1x2x3xf32>
+func.func @insert_static_slice_default_pad(%arg0: tensor<1x2x3xf32>, %arg1: tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32> {
+  %res = tensor.insert_slice %arg0 into %arg1[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 { vectorize_padding } : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// Same as above, but there's a pad value available that should be used instead of the default value.
+
+// CHECK-LABEL:   func.func @insert_static_slice_non_zero_pad
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x2x3xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32) -> tensor<9x8x7x1x2x3xf32> {
+// CHECK:           %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32>
+// CHECK:           %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, true, true]} : tensor<1x2x3xf32>, vector<1x2x3xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           return %[[RES]] : tensor<9x8x7x1x2x3xf32>
+func.func @insert_static_slice_non_zero_pad(%arg0: tensor<1x2x3xf32>, %pad : f32) -> tensor<9x8x7x1x2x3xf32> {
+  %init = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+  %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32>
+  %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 2, 3][1, 1, 1, 1, 1, 1] : tensor<1x2x3xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
+
+// -----
+
+// Same as above, but the source type has is dynamically shaped. This means
+// that the pad value is now required and the vector dim corresponding to the
+// dynamic shape has to be inferred from the shape of the destination tensor.
+
+// CHECK-LABEL:   func.func @insert_dynamic_slice_non_zero_pad(
+// CHECK-SAME:      %[[ARG_0:.*]]: tensor<1x?x3xf32>,
+// CHECK-SAME:      %[[PAD:.*]]: f32,
+// CHECK-SAME:      %[[SIZE:.*]]: index) -> tensor<9x8x7x1x2x3xf32> {
+// CHECK:           %[[EMPTY:.*]] = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[BC:.*]] = vector.broadcast %[[PAD]] : f32 to vector<9x8x7x1x2x3xf32>
+// CHECK:           %[[WRITE:.*]] = vector.transfer_write %[[BC]], %[[EMPTY]]{{.*}} {in_bounds = [true, true, true, true, true, true]} : vector<9x8x7x1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           %[[READ:.*]] = vector.transfer_read %[[ARG_0]]{{.*}}, %[[PAD]] {in_bounds = [true, false, true]} : tensor<1x?x3xf32>, vector<1x2x3xf32>
+// CHECK:           %[[RES:.*]] = vector.transfer_write %[[READ]], %[[WRITE]]{{.*}} {in_bounds = [true, true, true]} : vector<1x2x3xf32>, tensor<9x8x7x1x2x3xf32>
+// CHECK:           return %[[RES]] : tensor<9x8x7x1x2x3xf32>
+func.func @insert_dynamic_slice_non_zero_pad(%arg0: tensor<1x?x3xf32>, %pad : f32, %size: index) -> tensor<9x8x7x1x2x3xf32> {
+  %init = tensor.empty() : tensor<9x8x7x1x2x3xf32>
+  %fill = linalg.fill ins(%pad : f32) outs(%init : tensor<9x8x7x1x2x3xf32>) -> tensor<9x8x7x1x2x3xf32>
+  %res = tensor.insert_slice %arg0 into %fill[0, 0, 0, 0, 0, 0] [1, 1, 1, 1, %size, 3][1, 1, 1, 1, 1, 1] : tensor<1x?x3xf32> into tensor<9x8x7x1x2x3xf32>
+  return %res : tensor<9x8x7x1x2x3xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+    %0 = transform.structured.match ops{["tensor.insert_slice"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+    transform.yield
+  }
+}
diff --git a/mlir/test/Dialect/MemRef/expand-ops.mlir b/mlir/test/Dialect/MemRef/expand-ops.mlir
index f958a92b751a4ab..65932b5814a668c 100644
--- a/mlir/test/Dialect/MemRef/expand-ops.mlir
+++ b/mlir/test/Dialect/MemRef/expand-ops.mlir
@@ -52,14 +52,13 @@ func.func @memref_reshape(%input: memref<*xf32>,
 // CHECK-SAME: [[SRC:%.*]]: memref<*xf32>,
 // CHECK-SAME: [[SHAPE:%.*]]: memref<3xi32>) -> memref<?x?x8xf32> {
 
-// CHECK: [[C1:%.*]] = arith.constant 1 : index
 // CHECK: [[C8:%.*]] = arith.constant 8 : index
-// CHECK: [[STRIDE_1:%.*]] = arith.muli [[C1]], [[C8]] : index
-
-// CHECK: [[C1_:%.*]] = arith.constant 1 : index
-// CHECK: [[DIM_1:%.*]] = memref.load [[SHAPE]]{{\[}}[[C1_]]] : memref<3xi32>
+// CHECK: [[C1:%.*]] = arith.constant 1 : index
+// CHECK: [[DIM_1:%.*]] = memref.load [[SHAPE]]{{\[}}[[C1]]] : memref<3xi32>
 // CHECK: [[SIZE_1:%.*]] = arith.index_cast [[DIM_1]] : i32 to index
-// CHECK: [[STRIDE_0:%.*]] = arith.muli [[STRIDE_1]], [[SIZE_1]] : index
+
+// CHECK: [[C8_:%.*]] = arith.constant 8 : index
+// CHECK: [[STRIDE_0:%.*]] = arith.muli [[C8_]], [[SIZE_1]] : index
 
 // CHECK: [[C0:%.*]] = arith.constant 0 : index
 // CHECK: [[DIM_0:%.*]] = memref.load [[SHAPE]]{{\[}}[[C0]]] : memref<3xi32>
@@ -67,5 +66,5 @@ func.func @memref_reshape(%input: memref<*xf32>,
 
 // CHECK: [[RESULT:%.*]] = memref.reinterpret_cast [[SRC]]
 // CHECK-SAME: to offset: [0], sizes: {{\[}}[[SIZE_0]], [[SIZE_1]], 8],
-// CHECK-SAME: strides: {{\[}}[[STRIDE_0]], [[STRIDE_1]], [[C1]]]
+// CHECK-SAME: strides: {{\[}}[[STRIDE_0]], 8, 1]
 // CHECK-SAME: : memref<*xf32> to memref<?x?x8xf32>
diff --git a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
index 8aac802ba10ae9f..647731db439c080 100644
--- a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
+++ b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
@@ -931,19 +931,15 @@ func.func @extract_aligned_pointer_as_index_of_unranked_source(%arg0: memref<*xf
 //          = min(7, 1)
 //          = 1
 //
-//   CHECK-DAG: #[[$STRIDE0_MIN_MAP:.*]] = affine_map<()[s0] -> (s0)>
-//   CHECK-DAG: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)>
-//   CHECK-DAG: #[[$STRIDE1_MIN_MAP:.*]] = affine_map<()[s0, s1] -> (s0, s1, 42)>
+//       CHECK: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)>
 // CHECK-LABEL: func @simplify_collapse(
 //  CHECK-SAME: %[[ARG:.*]]: memref<?x?x4x?x6x7xi32>)
 //
 //       CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:6, %[[STRIDES:.*]]:6 = memref.extract_strided_metadata %[[ARG]] : memref<?x?x4x?x6x7xi32>
 //
-//   CHECK-DAG: %[[DYN_STRIDE0:.*]] = affine.min #[[$STRIDE0_MIN_MAP]]()[%[[STRIDES]]#0]
-//   CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$SIZE0_MAP]]()[%[[SIZES]]#1, %[[SIZES]]#3]
-//   CHECK-DAG: %[[DYN_STRIDE1:.*]] = affine.min #[[$STRIDE1_MIN_MAP]]()[%[[STRIDES]]#1, %[[STRIDES]]#2]
+//       CHECK: %[[DYN_SIZE1:.*]] = affine.apply #[[$SIZE0_MAP]]()[%[[SIZES]]#1, %[[SIZES]]#3]
 //
-//       CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [0], sizes: [%[[SIZES]]#0, %[[DYN_SIZE1]], 42], strides: [%[[DYN_STRIDE0]], %[[DYN_STRIDE1]], 1]
+//       CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [0], sizes: [%[[SIZES]]#0, %[[DYN_SIZE1]], 42], strides: [%[[STRIDES]]#0, 42, 1]
 func.func @simplify_collapse(%arg : memref<?x?x4x?x6x7xi32>)
   -> memref<?x?x42xi32> {
 
@@ -1046,15 +1042,12 @@ func.func @simplify_collapse_with_dim_of_size1_and_non_1_stride
 //           We just return the first dynamic one for this group.
 //
 //
-//   CHECK-DAG: #[[$STRIDE0_MIN_MAP:.*]] = affine_map<()[s0, s1] -> (s0, s1)>
 // CHECK-LABEL: func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride(
 //  CHECK-SAME: %[[ARG:.*]]: memref<2x3x1x1x1xi32, strided<[?, ?, ?, ?, 2]
 //
 //       CHECK: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:5, %[[STRIDES:.*]]:5 = memref.extract_strided_metadata %[[ARG]] : memref<2x3x1x1x1xi32, strided<[?, ?, ?, ?, 2], offset: ?>>
 //
-//   CHECK-DAG: %[[DYN_STRIDE0:.*]] = affine.min #[[$STRIDE0_MIN_MAP]]()[%[[STRIDES]]#0, %[[STRIDES]]#1]
-//
-//       CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[OFFSET]]], sizes: [6, 1], strides: [%[[DYN_STRIDE0]], %[[STRIDES]]#2]
+//       CHECK: %[[COLLAPSE_VIEW:.*]] = memref.reinterpret_cast %[[BASE]] to offset: [%[[OFFSET]]], sizes: [6, 1], strides: [%[[STRIDES]]#1, %[[STRIDES]]#2]
 func.func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride
     (%arg0: memref<2x3x1x1x1xi32, strided<[?, ?, ?, ?, 2], offset: ?>>)
     -> memref<6x1xi32, strided<[?, ?], offset: ?>> {
@@ -1083,8 +1076,7 @@ func.func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride
 // Stride 2 = origStride5
 //          = 1
 //
-//   CHECK-DAG: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)>
-//   CHECK-DAG: #[[$STRIDE0_MAP:.*]] = affine_map<()[s0] -> (s0)>
+//       CHECK: #[[$SIZE0_MAP:.*]] = affine_map<()[s0, s1] -> ((s0 * s1) * 4)>
 // CHECK-LABEL: func @extract_strided_metadata_of_collapse(
 //  CHECK-SAME: %[[ARG:.*]]: memref<?x?x4x?x6x7xi32>)
 //
@@ -1094,10 +1086,9 @@ func.func @simplify_collapse_with_dim_of_size1_and_resulting_dyn_stride
 //
 //   CHECK-DAG: %[[BASE:.*]], %[[OFFSET:.*]], %[[SIZES:.*]]:6, %[[STRIDES:.*]]:6 = memref.extract_strided_metadata %[[ARG]] : memref<?x?x4x?x6x7xi32>
 //
-//   CHECK-DAG: %[[DYN_STRIDE0:.*]] = affine.min #[[$STRIDE0_MAP]]()[%[[STRIDES]]#0]
 //   CHECK-DAG: %[[DYN_SIZE1:.*]] = affine.apply #[[$SIZE0_MAP]]()[%[[SIZES]]#1, %[[SIZES]]#3]
 //
-//       CHECK: return %[[BASE]], %[[C0]], %[[SIZES]]#0, %[[DYN_SIZE1]], %[[C42]], %[[DYN_STRIDE0]], %[[C42]], %[[C1]]
+//       CHECK: return %[[BASE]], %[[C0]], %[[SIZES]]#0, %[[DYN_SIZE1]], %[[C42]], %[[STRIDES]]#0, %[[C42]], %[[C1]]
 func.func @extract_strided_metadata_of_collapse(%arg : memref<?x?x4x?x6x7xi32>)
   -> (memref<i32>, index,
       index, index, index,
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index 0f533cb95a0ca93..51c4781c9022b25 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -217,6 +217,15 @@ func.func @memref_reinterpret_cast_no_map_but_offset(%in: memref<?xf32>) {
 
 // -----
 
+func.func @memref_reinterpret_cast_offset_mismatch_dynamic(%in: memref<?xf32>, %offset : index) {
+  // expected-error @+1 {{expected result type with offset = dynamic instead of 0}}
+  %out = memref.reinterpret_cast %in to offset: [%offset], sizes: [10], strides: [1]
+         : memref<?xf32> to memref<10xf32>
+  return
+}
+
+// -----
+
 func.func @memref_reinterpret_cast_no_map_but_stride(%in: memref<?xf32>) {
   // expected-error @+1 {{expected result type with stride = 10 instead of 1 in dim = 0}}
   %out = memref.reinterpret_cast %in to offset: [0], sizes: [10], strides: [10]
diff --git a/mlir/test/Dialect/SCF/bufferize.mlir b/mlir/test/Dialect/SCF/bufferize.mlir
index ff1612310255a0d..53fcee692226cb7 100644
--- a/mlir/test/Dialect/SCF/bufferize.mlir
+++ b/mlir/test/Dialect/SCF/bufferize.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -scf-bufferize | FileCheck %s
+// RUN: mlir-opt %s -one-shot-bufferize="dialect-filter=scf,bufferization copy-before-write unknown-type-conversion=identity-layout-map" -split-input-file | FileCheck %s
 
 // CHECK-LABEL:   func @if(
 // CHECK-SAME:             %[[PRED:.*]]: i1,
@@ -23,15 +23,21 @@ func.func @if(%pred: i1, %true_val: tensor<?xf32>, %false_val: tensor<?xf32>) ->
   return %0 : tensor<?xf32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @for(
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[LB:.*]]: index, %[[UB:.*]]: index,
 // CHECK-SAME:              %[[STEP:.*]]: index) -> tensor<f32> {
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %[[VAL_6:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
+// Note: scf.for iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[MEMREF]], %[[MEMREF_COPY]]
+// CHECK:           %[[RESULT_MEMREF:.*]] = scf.for %{{.*}} = %[[LB]] to %[[UB]] step %[[STEP]] iter_args(%[[ITER:.*]] = %[[MEMREF_COPY]]) -> (memref<f32>) {
 // CHECK:             scf.yield %[[ITER]] : memref<f32>
 // CHECK:           } {some_attr}
-// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[VAL_9:.*]] : memref<f32>
+// CHECK:           %[[VAL_8:.*]] = bufferization.to_tensor %[[RESULT_MEMREF]] : memref<f32>
 // CHECK:           return %[[VAL_8]] : tensor<f32>
 // CHECK:         }
 func.func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tensor<f32> {
@@ -41,6 +47,8 @@ func.func @for(%arg0: tensor<f32>, %lb: index, %ub: index, %step: index) -> tens
   return %ret : tensor<f32>
 }
 
+// -----
+
 // Check whether this converts at all.
 //
 // It would previously fail altogether.
@@ -57,17 +65,23 @@ func.func @if_correct_recursive_legalization_behavior(%pred: i1, %tensor: tensor
   return %0 : tensor<f32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @for_correct_recursive_legalization_behavior(
 // CHECK-SAME:                                                      %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:                                                      %[[INDEX:.*]]: index) -> tensor<f32> {
 // CHECK:           %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<f32>
-// CHECK:           %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF]]) -> (memref<f32>) {
+// Note: scf.for iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[MEMREF]], %[[MEMREF_COPY]]
+// CHECK:           %[[RESULT:.*]] = scf.for %{{.*}} = %[[INDEX]] to %[[INDEX]] step %[[INDEX]] iter_args(%[[MEMREF_ITER:.*]] = %[[MEMREF_COPY]]) -> (memref<f32>) {
 // CHECK:             %[[TENSOR_ITER:.*]] = bufferization.to_tensor %[[MEMREF_ITER]] : memref<f32>
 // CHECK:             %[[TENSOR_MUNGED:.*]] = "test.munge_tensor"(%[[TENSOR_ITER]]) : (tensor<f32>) -> tensor<f32>
 // CHECK:             %[[MEMREF_MUNGED:.*]] = bufferization.to_memref %[[TENSOR_MUNGED]] : memref<f32>
 // CHECK:             scf.yield %[[MEMREF_MUNGED]] : memref<f32>
 // CHECK:           }
-// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT:.*]] : memref<f32>
+// CHECK:           %[[TENSOR:.*]] = bufferization.to_tensor %[[RESULT]] : memref<f32>
 // CHECK:           return %[[TENSOR]] : tensor<f32>
 // CHECK:         }
 func.func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %index: index) -> tensor<f32> {
@@ -78,11 +92,17 @@ func.func @for_correct_recursive_legalization_behavior(%arg0: tensor<f32>, %inde
   return %ret : tensor<f32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @bufferize_while(
 // CHECK-SAME: %[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64, %[[ARG2:.*]]: tensor<f32>
 // CHECK: %[[M:.*]] = bufferization.to_memref %[[ARG2]] : memref<f32>
-// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %{{.*}} = %[[M]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
-// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, memref<f32>
+// Note: scf.while iter_args always bufferize to a memory write. This could be
+// optimized by analyzing the loop body.
+// CHECK:           %[[MEMREF_COPY:.*]] = memref.alloc()
+// CHECK:           memref.copy %[[M]], %[[MEMREF_COPY]]
+// CHECK: %[[RES1:.*]]:3 = scf.while (%{{.*}} = %[[ARG0]], %[[ITER:.*]] = %[[MEMREF_COPY]]) : (i64, memref<f32>) -> (i64, i64, memref<f32>)
+// CHECK: scf.condition(%{{.*}}) %{{.*}}, %{{.*}}, %[[ITER]] : i64, i64, memref<f32>
 // CHECK: ^bb0(%{{.*}}: i64, %{{.*}}: i64, %{{.*}}: memref<f32>):
 // CHECK: scf.yield %{{.*}}, %{{.*}} : i64, memref<f32>
 // CHECK:  %[[RES2:.*]] = bufferization.to_tensor %[[RES1]]#2 : memref<f32>
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 6d6bc199e601c0a..c963460e7259fb5 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -800,6 +800,43 @@ func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vecto
 
 // -----
 
+// CHECK-LABEL: func @extract_no_fold_scalar_to_0d(
+//  CHECK-SAME:     %[[v:.*]]: vector<f32>)
+//       CHECK:   %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32>
+//       CHECK:   return %[[extract]]
+func.func @extract_no_fold_scalar_to_0d(%v: vector<f32>) -> f32 {
+  %0 = vector.extract %v[] : f32 from vector<f32>
+  return %0 : f32
+}
+
+// -----
+
+// CHECK-LABEL: func @insert_fold_same_rank(
+//  CHECK-SAME:     %[[v:.*]]: vector<2x2xf32>)
+//       CHECK:      %[[CST:.+]] = arith.constant
+//  CHECK-SAME:                    : vector<2x2xf32>
+//       CHECK-NOT:  vector.insert
+//       CHECK:   return %[[CST]]
+func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> {
+  %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32>
+  %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32>
+  return %0 : vector<2x2xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @insert_no_fold_scalar_to_0d(
+//  CHECK-SAME:     %[[v:.*]]: vector<f32>)
+//       CHECK:   %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector<f32>
+//       CHECK:   return %[[extract]]
+func.func @insert_no_fold_scalar_to_0d(%v: vector<f32>) -> vector<f32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = vector.insert %cst, %v [] : f32 into vector<f32>
+  return %0 : vector<f32>
+}
+
+// -----
+
 // CHECK-LABEL: dont_fold_expand_collapse
 //       CHECK:   %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32>
 //       CHECK:   %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32>
@@ -2606,17 +2643,6 @@ func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi3
 
 // -----
 
-// CHECK-LABEL: func @extract_from_0d_regression(
-//  CHECK-SAME:     %[[v:.*]]: vector<f32>)
-//       CHECK:   %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector<f32>
-//       CHECK:   return %[[extract]]
-func.func @extract_from_0d_regression(%v: vector<f32>) -> f32 {
-  %0 = vector.extract %v[] : f32 from vector<f32>
-  return %0 : f32
-}
-
-// -----
-
 // CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression(
 //  CHECK-SAME:     %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>)
 func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) {
diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir
index 36d04bb77e3b969..56039d04549aa53 100644
--- a/mlir/test/Dialect/Vector/invalid.mlir
+++ b/mlir/test/Dialect/Vector/invalid.mlir
@@ -1356,7 +1356,7 @@ func.func @maskedload_base_type_mismatch(%base: memref<?xf64>, %mask: vector<16x
 
 func.func @maskedload_dim_mask_mismatch(%base: memref<?xf32>, %mask: vector<15xi1>, %pass: vector<16xf32>) {
   %c0 = arith.constant 0 : index
-  // expected-error@+1 {{'vector.maskedload' op expected result dim to match mask dim}}
+  // expected-error@+1 {{'vector.maskedload' op expected result shape to match mask shape}}
   %0 = vector.maskedload %base[%c0], %mask, %pass : memref<?xf32>, vector<15xi1>, vector<16xf32> into vector<16xf32>
 }
 
@@ -1387,7 +1387,7 @@ func.func @maskedstore_base_type_mismatch(%base: memref<?xf64>, %mask: vector<16
 
 func.func @maskedstore_dim_mask_mismatch(%base: memref<?xf32>, %mask: vector<15xi1>, %value: vector<16xf32>) {
   %c0 = arith.constant 0 : index
-  // expected-error@+1 {{'vector.maskedstore' op expected valueToStore dim to match mask dim}}
+  // expected-error@+1 {{'vector.maskedstore' op expected valueToStore shape to match mask shape}}
   vector.maskedstore %base[%c0], %mask, %value : memref<?xf32>, vector<15xi1>, vector<16xf32>
 }
 
@@ -1717,6 +1717,15 @@ func.func @vector_mask_shape_mismatch(%a: vector<8xi32>, %m0: vector<16xi1>) ->
 
 // -----
 
+func.func @vector_mask_passthru_type_mismatch(%t0: tensor<f32>, %m0: vector<i1>) -> vector<f32> {
+  %ft0 = arith.constant 0.0 : f32
+  // expected-error@+1 {{'vector.mask' op operand #0 must be vector of 1-bit signless integer values, but got 'vector<i1>'}}
+  %0 = vector.mask %m0 { vector.transfer_read %t0[], %ft0 : tensor<f32>, vector<f32> } : vector<i1> -> vector<f32>
+  return %0 : vector<f32>
+}
+
+// -----
+
 // expected-note@+1 {{prior use here}}
 func.func @vector_mask_passthru_type_mismatch(%t0: tensor<?xf32>, %idx: index, %m0: vector<16xi1>, %pt0: vector<16xi32>) -> vector<16xf32> {
   %ft0 = arith.constant 0.0 : f32
diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir
new file mode 100644
index 000000000000000..7ecbad7968225d0
--- /dev/null
+++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir
@@ -0,0 +1,67 @@
+// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8" --cse --split-input-file %s | FileCheck %s
+
+func.func @vector_load_i2(%arg1: index, %arg2: index) -> vector<3x3xi2> {
+    %0 = memref.alloc() : memref<3x3xi2>
+    %c0 = arith.constant 0 : index
+    %c2 = arith.constant 2 : index
+    %cst = arith.constant dense<0> : vector<3x3xi2>
+    %1 = vector.load %0[%c2, %c0] : memref<3x3xi2>, vector<3xi2>
+    %2 = vector.insert %1, %cst [0] : vector<3xi2> into vector<3x3xi2>
+    return %2 : vector<3x3xi2>
+}
+
+// CHECK: func @vector_load_i2
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8>
+// CHECK: %[[INDEX:.+]] = arith.constant 1 : index
+// CHECK: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<3xi8>, vector<2xi8>
+// CHECK: %[[VEC_I2:.+]] = vector.bitcast %[[VEC]] : vector<2xi8> to vector<8xi2>
+// CHECK: %[[EXCTRACT:.+]] = vector.extract_strided_slice %[[VEC_I2]] {offsets = [2], sizes = [3], strides = [1]} : vector<8xi2> to vector<3xi2>
+
+//-----
+
+func.func @vector_transfer_read_i2() -> vector<3xi2> {
+ %0 = memref.alloc() : memref<3x3xi2>
+ %c0i2 = arith.constant 0 : i2
+ %c0 = arith.constant 0 : index
+ %c2 = arith.constant 2 : index
+ %1 = vector.transfer_read %0[%c2, %c0], %c0i2 {in_bounds = [true]} : memref<3x3xi2>, vector<3xi2>
+ return %1 : vector<3xi2>
+}
+
+// CHECK: func @vector_transfer_read_i2
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8>
+// CHECK: %[[INDEX:.+]] = arith.constant 1 : index
+// CHECK: %[[READ:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %0 : memref<3xi8>, vector<2xi8>
+// CHECK: %[[BITCAST:.+]] = vector.bitcast %[[READ]] : vector<2xi8> to vector<8xi2>
+// CHECK: vector.extract_strided_slice %[[BITCAST]] {offsets = [2], sizes = [3], strides = [1]} : vector<8xi2> to vector<3xi2>
+
+//-----
+
+func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> {
+    %0 = memref.alloc() : memref<3x5xi2>
+    %cst = arith.constant dense<0> : vector<3x5xi2>
+    %mask = vector.constant_mask [3] : vector<5xi1>
+    %c0 = arith.constant 0 : index
+    %c2 = arith.constant 2 : index
+    %1 = vector.maskedload %0[%c2, %c0], %mask, %passthru :
+      memref<3x5xi2>, vector<5xi1>, vector<5xi2> into vector<5xi2>
+    %2 = vector.insert %1, %cst [0] : vector<5xi2> into vector<3x5xi2>
+    return %2 : vector<3x5xi2>
+}
+
+// CHECK: func @vector_cst_maskedload_i2
+// CHECK: %[[ORIGINMASK:.+]] = vector.constant_mask [3] : vector<5xi1>
+// CHECK: %[[NEWMASK:.+]] = arith.constant dense<true> : vector<2xi1>
+// CHECK: %[[VESSEL:.+]] = arith.constant dense<0> : vector<8xi2>
+// CHECK: %[[INSERT1:.+]] = vector.insert_strided_slice %arg0, %[[VESSEL]]
+// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi2> into vector<8xi2>
+// CHECK: %[[BITCAST1:.+]] = vector.bitcast %[[INSERT1]] : vector<8xi2> to vector<2xi8>
+// CHECK: %[[C2:.+]] = arith.constant 2 : index
+// CHECK: %[[MASKEDLOAD:.+]] = vector.maskedload %alloc[%[[C2]]], %[[NEWMASK:.+]], %[[BITCAST1]]
+// CHECK-SAME: : memref<4xi8>, vector<2xi1>, vector<2xi8> into vector<2xi8>
+// CHECK: %[[BITCAST2:.+]] = vector.bitcast %[[MASKEDLOAD]] : vector<2xi8> to vector<8xi2>
+// CHECK: %[[CST2:.+]] = arith.constant dense<false> : vector<8xi1>
+// CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[ORIGINMASK]], %[[CST2]]
+// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi1> into vector<8xi1>
+// CHECK: %[[SELECT:.+]] = arith.select %[[INSERT2]], %[[BITCAST2]], %[[INSERT1]] : vector<8xi1>, vector<8xi2>
+// CHECK: vector.extract_strided_slice %[[SELECT]] {offsets = [2], sizes = [5], strides = [1]} : vector<8xi2> to vector<5xi2> 
diff --git a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir
index e9d12b044e2c7e8..8234351302f6b56 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-drop-unit-dims-patterns.mlir
@@ -1,5 +1,9 @@
 // RUN: mlir-opt %s --transform-interpreter | FileCheck %s
 
+//-----------------------------------------------------------------------------
+// [Patterns: TransferWriteDropUnitDimsPattern, TransferReadeDropUnitDimsPattern]
+//-----------------------------------------------------------------------------
+
 func.func @transfer_read_rank_reducing(
       %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>) -> vector<3x2xi8> {
     %c0 = arith.constant 0 : index
@@ -14,7 +18,29 @@ func.func @transfer_read_rank_reducing(
 //  CHECK-SAME:     memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}>
 //       CHECK:   vector.transfer_read %[[SUBVIEW]]
 
-func.func @transfer_write_rank_reducing(%arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, %vec : vector<3x2xi8>) {
+func.func @transfer_read_rank_reducing_masked(
+      %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>,
+      %mask: vector<3x2xi1>) -> vector<3x2xi8> {
+    %c0 = arith.constant 0 : index
+    %cst = arith.constant 0 : i8
+    %v = vector.mask %mask {
+      vector.transfer_read %arg[%c0, %c0, %c0, %c0], %cst :
+        memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>, vector<3x2xi8>
+    } : vector<3x2xi1> -> vector<3x2xi8>
+    return %v : vector<3x2xi8>
+}
+// CHECK-LABEL: func @transfer_read_rank_reducing_masked
+//  CHECK-SAME:     %[[ARG:.+]]: memref<1x1x3x2xi8
+//  CHECK-SAME:     %[[MASK:.+]]: vector<3x2xi1>
+//       CHECK:   %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1]
+//  CHECK-SAME:     memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}>
+//       CHECK:   vector.mask %[[MASK]]
+//  CHECK-SAME:  vector.transfer_read %[[SUBVIEW]]
+
+func.func @transfer_write_rank_reducing(
+      %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>,
+      %vec : vector<3x2xi8>) {
+
     %c0 = arith.constant 0 : index
     vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
       vector<3x2xi8>, memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>
@@ -26,6 +52,26 @@ func.func @transfer_write_rank_reducing(%arg : memref<1x1x3x2xi8, strided<[6, 6,
 //  CHECK-SAME:     memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}>
 //       CHECK:   vector.transfer_write %{{.*}}, %[[SUBVIEW]]
 
+func.func @transfer_write_rank_reducing_masked(
+      %arg : memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>,
+      %vec : vector<3x2xi8>,
+      %mask: vector<3x2xi1>) {
+    %c0 = arith.constant 0 : index
+    vector.mask %mask {
+      vector.transfer_write %vec, %arg [%c0, %c0, %c0, %c0] :
+        vector<3x2xi8>, memref<1x1x3x2xi8, strided<[6, 6, 2, 1], offset: ?>>
+    } : vector<3x2xi1>
+    return
+}
+// CHECK-LABEL: func @transfer_write_rank_reducing_masked
+//  CHECK-SAME:     %[[ARG:.+]]: memref<1x1x3x2xi8
+//  CHECK-SAME:     %[[VEC:.+]]: vector<3x2xi8>
+//  CHECK-SAME:     %[[MASK:.+]]: vector<3x2xi1>
+//       CHECK:   %[[SUBVIEW:.+]] = memref.subview %[[ARG]][0, 0, 0, 0] [1, 1, 3, 2] [1, 1, 1, 1]
+//  CHECK-SAME:     memref<1x1x3x2xi8, {{.*}}> to memref<3x2xi8, {{.*}}>
+//       CHECK:   vector.mask %[[MASK]]
+//  CHECK-SAME:   vector.transfer_write %{{.*}}, %[[SUBVIEW]]
+
 func.func @transfer_read_and_vector_rank_reducing(
       %arg : memref<1x1x3x2x1xf32>) -> vector<3x2x1xf32> {
     %c0 = arith.constant 0 : index
@@ -68,6 +114,22 @@ func.func @transfer_read_and_vector_rank_reducing_to_0d(
 //       CHECK:   %[[READ:.+]] = vector.transfer_read %[[SUBVIEW]]{{.*}} : memref<f32>, vector<f32>
 //       CHECK:   vector.shape_cast %[[READ]] : vector<f32> to vector<1x1x1xf32>
 
+func.func @transfer_read_and_vector_rank_reducing_to_0d_masked(
+      %arg : memref<1x1x1x1x1xf32>,
+      %mask: vector<1x1x1xi1>) -> vector<1x1x1xf32> {
+
+    %c0 = arith.constant 0 : index
+    %cst = arith.constant 0.0 : f32
+    %v = vector.mask %mask {
+      vector.transfer_read %arg[%c0, %c0, %c0, %c0, %c0], %cst
+        : memref<1x1x1x1x1xf32>, vector<1x1x1xf32>
+    } : vector<1x1x1xi1> -> vector<1x1x1xf32>
+    return %v : vector<1x1x1xf32>
+}
+// CHECK-LABEL: func @transfer_read_and_vector_rank_reducing_to_0d_masked
+//   CHECK-NOT:   vector.shape_cast
+//   CHECK-NOT:   memref.subview
+
 func.func @transfer_write_and_vector_rank_reducing_to_0d(
       %arg : memref<1x1x1x1x1xf32>,
       %vec : vector<1x1x1xf32>) {
@@ -82,6 +144,23 @@ func.func @transfer_write_and_vector_rank_reducing_to_0d(
 //       CHECK:   %[[SHCAST:.+]] = vector.shape_cast %[[VECTOR]] : vector<1x1x1xf32> to vector<f32>
 //       CHECK:   vector.transfer_write %[[SHCAST]], %[[SUBVIEW]]{{.*}} : vector<f32>, memref<f32>
 
+func.func @transfer_write_and_vector_rank_reducing_to_0d_masked(
+      %arg : memref<1x1x1x1x1xf32>,
+      %vec : vector<1x1x1xf32>,
+      %mask: vector<1x1x1xi1>) {
+
+    %c0 = arith.constant 0 : index
+    %cst = arith.constant 0.0 : f32
+    vector.mask %mask {
+      vector.transfer_write %vec, %arg[%c0, %c0, %c0, %c0, %c0] :
+        vector<1x1x1xf32>, memref<1x1x1x1x1xf32>
+    } : vector<1x1x1xi1>
+    return
+}
+// CHECK-LABEL: func @transfer_write_and_vector_rank_reducing_to_0d_masked
+//   CHECK-NOT:   vector.shape_cast
+//   CHECK-NOT:   memref.subview
+
 func.func @transfer_read_dynamic_rank_reducing(
       %arg : memref<?x1xi8, strided<[?, ?], offset: ?>>) -> vector<[16]x1xi8> {
     %c0 = arith.constant 0 : index
diff --git a/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py b/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py
index 75f0dc947e06817..5394d4a3272555d 100644
--- a/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py
+++ b/mlir/test/Integration/GPU/CUDA/sm90/python/tools/matmulBuilder.py
@@ -568,9 +568,7 @@ def generate_matmul_ws(
                                 barId,
                                 predicate=consumerPrimaryThread,
                             )
-                            nvgpu.mbarrier_arrive(
-                                ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId
-                            )
+                            nvgpu.mbarrier_arrive(mbarDONE, barId)
                             debug_print(
                                 "[cons] iv={}  | mbarDONE[{}] arrive [done]",
                                 iv,
@@ -589,14 +587,9 @@ def generate_matmul_ws(
                         # Step 6.3.5. Yield
                         scf.yield_([new_acc, phaseParity])
 
-                    # Step 6.3. Wait All WGMMA
-                    nvvm.WgmmaWaitGroupSyncOp(0)
-
                     with ir.InsertionPoint(scf.IfOp(consumerPrimaryThread).then_block):
                         barId = c((K // BLOCK_K) % num_stages)
-                        nvgpu.mbarrier_arrive(
-                            ir.Type.parse("!nvgpu.mbarrier.token"), mbarDONE, barId
-                        )
+                        nvgpu.mbarrier_arrive(mbarDONE, barId)
                         scf.yield_([])
 
                     # Step 6.4. Epilogue (registers --> shared memory)
diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir
index 0471e5faf845783..75ce958b43fd346 100644
--- a/mlir/test/Target/LLVMIR/nvvmir.mlir
+++ b/mlir/test/Target/LLVMIR/nvvmir.mlir
@@ -259,6 +259,15 @@ llvm.func @nvvm_vote(%0 : i32, %1 : i1) -> i32 {
   llvm.return %3 : i32
 }
 
+// CHECK-LABEL: @nvvm_elect_sync
+llvm.func @nvvm_elect_sync() -> i1 {
+  // CHECK: %[[RES:.*]] = call { i32, i1 } @llvm.nvvm.elect.sync(i32 -1)
+  // CHECK-NEXT: %[[PRED:.*]] = extractvalue { i32, i1 } %[[RES]], 1
+  // CHECK-NEXT: ret i1 %[[PRED]]
+  %0 = nvvm.elect.sync -> i1
+  llvm.return %0 : i1
+}
+
 // CHECK-LABEL: @nvvm_mma_mn8n8k4_row_col_f32_f32
 llvm.func @nvvm_mma_mn8n8k4_row_col_f32_f32(%a0 : vector<2xf16>, %a1 : vector<2xf16>,
                     %b0 : vector<2xf16>, %b1 : vector<2xf16>,
diff --git a/mlir/test/Target/LLVMIR/openmp-reduction.mlir b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
index 6d74a925b87b5c9..11c8559044be025 100644
--- a/mlir/test/Target/LLVMIR/openmp-reduction.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-reduction.mlir
@@ -586,3 +586,83 @@ llvm.func @parallel_nested_workshare_reduction(%ub : i64) {
 // Reduction function.
 // CHECK: define internal void @[[REDFUNC]]
 // CHECK: add i32
+
+// -----
+
+omp.declare_reduction @add_f32 : f32
+init {
+^bb0(%arg: f32):
+  %0 = llvm.mlir.constant(0.0 : f32) : f32
+  omp.yield (%0 : f32)
+}
+combiner {
+^bb1(%arg0: f32, %arg1: f32):
+  %1 = llvm.fadd %arg0, %arg1 : f32
+  omp.yield (%1 : f32)
+}
+atomic {
+^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr):
+  %2 = llvm.load %arg3 : !llvm.ptr -> f32
+  llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32
+  omp.yield
+}
+
+// CHECK-LABEL: @wsloop_simd_reduction
+llvm.func @wsloop_simd_reduction(%lb : i64, %ub : i64, %step : i64) {
+  %c1 = llvm.mlir.constant(1 : i32) : i32
+  %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr
+  omp.parallel {
+    omp.wsloop reduction(@add_f32 %0 -> %prv1 : !llvm.ptr) {
+      omp.simd reduction(@add_f32 %prv1 -> %prv2 : !llvm.ptr) {
+        omp.loop_nest (%iv) : i64 = (%lb) to (%ub) step (%step) {
+          %1 = llvm.mlir.constant(2.0 : f32) : f32
+          %2 = llvm.load %prv2 : !llvm.ptr -> f32
+          %3 = llvm.fadd %1, %2 : f32
+          llvm.store %3, %prv2 : f32, !llvm.ptr
+          omp.yield
+        }
+      } {omp.composite}
+    } {omp.composite}
+    omp.terminator
+  }
+  llvm.return
+}
+
+// Same checks as for wsloop reduction, because currently omp.simd is ignored in
+// a composite 'do/for simd' construct.
+// Call to the outlined function.
+// CHECK: call void {{.*}} @__kmpc_fork_call
+// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Outlined function.
+// CHECK: define internal void @[[OUTLINED]]
+
+// Private reduction variable and its initialization.
+// CHECK: %[[PRIVATE:.+]] = alloca float
+// CHECK: store float 0.000000e+00, ptr %[[PRIVATE]]
+
+// Call to the reduction function.
+// CHECK: call i32 @__kmpc_reduce
+// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]]
+
+// Atomic reduction.
+// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
+// CHECK: atomicrmw fadd ptr %{{.*}}, float %[[PARTIAL]]
+
+// Non-atomic reduction:
+// CHECK: fadd float
+// CHECK: call void @__kmpc_end_reduce
+// CHECK: br label %[[FINALIZE:.+]]
+
+// CHECK: [[FINALIZE]]:
+// CHECK: call void @__kmpc_barrier
+
+// Update of the private variable using the reduction region
+// (the body block currently comes after all the other blocks).
+// CHECK: %[[PARTIAL:.+]] = load float, ptr %[[PRIVATE]]
+// CHECK: %[[UPDATED:.+]] = fadd float 2.000000e+00, %[[PARTIAL]]
+// CHECK: store float %[[UPDATED]], ptr %[[PRIVATE]]
+
+// Reduction function.
+// CHECK: define internal void @[[REDFUNC]]
+// CHECK: fadd float
diff --git a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir
index 6e8f0162e505d06..031442b0ee2daf0 100644
--- a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir
+++ b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir
@@ -3,7 +3,7 @@
 // Test that an error is emitted when an operation is marked as "erased", but
 // has users that live across the conversion.
 func.func @remove_all_ops(%arg0: i32) -> i32 {
-  // expected-error@below {{failed to legalize unresolved materialization from () to 'i32' that remained live after conversion}}
+  // expected-error@below {{failed to legalize unresolved materialization from () to ('i32') that remained live after conversion}}
   %0 = "test.illegal_op_a"() : () -> i32
   // expected-note@below {{see existing live user here}}
   return %0 : i32
diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir
index f130adff42f8cdd..db8bd0f6378d29b 100644
--- a/mlir/test/Transforms/test-legalize-type-conversion.mlir
+++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir
@@ -2,7 +2,7 @@
 
 
 func.func @test_invalid_arg_materialization(
-  // expected-error@below {{failed to legalize unresolved materialization from () to 'i16' that remained live after conversion}}
+  // expected-error@below {{failed to legalize unresolved materialization from () to ('i16') that remained live after conversion}}
   %arg0: i16) {
   // expected-note@below{{see existing live user here}}
   "foo.return"(%arg0) : (i16) -> ()
@@ -21,7 +21,7 @@ func.func @test_valid_arg_materialization(%arg0: i64) {
 // -----
 
 func.func @test_invalid_result_materialization() {
-  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}}
+  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}}
   %result = "test.type_producer"() : () -> f16
   // expected-note@below{{see existing live user here}}
   "foo.return"(%result) : (f16) -> ()
@@ -30,7 +30,7 @@ func.func @test_invalid_result_materialization() {
 // -----
 
 func.func @test_invalid_result_materialization() {
-  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}}
+  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}}
   %result = "test.type_producer"() : () -> f16
   // expected-note@below{{see existing live user here}}
   "foo.return"(%result) : (f16) -> ()
@@ -50,7 +50,7 @@ func.func @test_transitive_use_materialization() {
 // -----
 
 func.func @test_transitive_use_invalid_materialization() {
-  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}}
+  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}}
   %result = "test.another_type_producer"() : () -> f16
   // expected-note@below{{see existing live user here}}
   "foo.return"(%result) : (f16) -> ()
@@ -102,7 +102,7 @@ func.func @test_block_argument_not_converted() {
 // Make sure argument type changes aren't implicitly forwarded.
 func.func @test_signature_conversion_no_converter() {
   "test.signature_conversion_no_converter"() ({
-  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}}
+  // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f32') that remained live after conversion}}
   ^bb0(%arg0: f32):
     "test.type_consumer"(%arg0) : (f32) -> ()
     // expected-note@below{{see existing live user here}}
diff --git a/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp b/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp
index 5c03ac12d1e58ce..b18dfd8bb22cb15 100644
--- a/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp
+++ b/mlir/test/lib/Conversion/OneToNTypeConversion/TestOneToNTypeConversionPass.cpp
@@ -147,9 +147,14 @@ populateDecomposeTuplesTestPatterns(const TypeConverter &typeConverter,
 ///
 /// This function has been copied (with small adaptions) from
 /// TestDecomposeCallGraphTypes.cpp.
-static std::optional<SmallVector<Value>>
-buildGetTupleElementOps(OpBuilder &builder, TypeRange resultTypes, Value input,
-                        Location loc) {
+static SmallVector<Value> buildGetTupleElementOps(OpBuilder &builder,
+                                                  TypeRange resultTypes,
+                                                  ValueRange inputs,
+                                                  Location loc) {
+  if (inputs.size() != 1)
+    return {};
+  Value input = inputs.front();
+
   TupleType inputType = dyn_cast<TupleType>(input.getType());
   if (!inputType)
     return {};
@@ -222,7 +227,7 @@ void TestOneToNTypeConversionPass::runOnOperation() {
   auto *context = &getContext();
 
   // Assemble type converter.
-  OneToNTypeConverter typeConverter;
+  TypeConverter typeConverter;
 
   typeConverter.addConversion([](Type type) { return type; });
   typeConverter.addConversion(
@@ -234,6 +239,11 @@ void TestOneToNTypeConversionPass::runOnOperation() {
   typeConverter.addArgumentMaterialization(buildMakeTupleOp);
   typeConverter.addSourceMaterialization(buildMakeTupleOp);
   typeConverter.addTargetMaterialization(buildGetTupleElementOps);
+  // Test the other target materialization variant that takes the original type
+  // as additional argument. This materialization function always fails.
+  typeConverter.addTargetMaterialization(
+      [](OpBuilder &builder, TypeRange resultTypes, ValueRange inputs,
+         Location loc, Type originalType) -> SmallVector<Value> { return {}; });
 
   // Assemble patterns.
   RewritePatternSet patterns(context);
diff --git a/mlir/test/lib/Pass/CMakeLists.txt b/mlir/test/lib/Pass/CMakeLists.txt
index b190f054e50bd1c..f489b7e51e5038a 100644
--- a/mlir/test/lib/Pass/CMakeLists.txt
+++ b/mlir/test/lib/Pass/CMakeLists.txt
@@ -3,6 +3,7 @@ get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
 add_mlir_library(MLIRTestPass
   TestDynamicPipeline.cpp
   TestPassManager.cpp
+  TestSPIRVCPURunnerPipeline.cpp
 
   EXCLUDE_FROM_LIBMLIR
 
diff --git a/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp b/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp
new file mode 100644
index 000000000000000..ded0d22c31307e9
--- /dev/null
+++ b/mlir/test/lib/Pass/TestSPIRVCPURunnerPipeline.cpp
@@ -0,0 +1,47 @@
+//===------------------ TestSPIRVCPURunnerPipeline.cpp --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements a pipeline for use by mlir-spirv-cpu-runner tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h"
+#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
+#include "mlir/Dialect/SPIRV/Transforms/Passes.h"
+#include "mlir/Pass/PassManager.h"
+
+using namespace mlir;
+
+namespace {
+
+void buildTestSPIRVCPURunnerPipeline(OpPassManager &passManager) {
+  passManager.addPass(createGpuKernelOutliningPass());
+  passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true));
+
+  OpPassManager &nestedPM = passManager.nest<spirv::ModuleOp>();
+  nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass());
+  nestedPM.addPass(spirv::createSPIRVUpdateVCEPass());
+  passManager.addPass(createLowerHostCodeToLLVMPass());
+  passManager.addPass(createConvertSPIRVToLLVMPass());
+}
+
+} // namespace
+
+namespace mlir {
+namespace test {
+void registerTestSPIRVCPURunnerPipeline() {
+  PassPipelineRegistration<>(
+      "test-spirv-cpu-runner-pipeline",
+      "Runs a series of passes for lowering SPIR-V-dialect MLIR to "
+      "LLVM-dialect MLIR intended for mlir-spirv-cpu-runner.",
+      buildTestSPIRVCPURunnerPipeline);
+}
+} // namespace test
+} // namespace mlir
diff --git a/mlir/test/mlir-spirv-cpu-runner/double.mlir b/mlir/test/mlir-spirv-cpu-runner/double.mlir
index cd551ffb1bd0623..35557ba1e94c003 100644
--- a/mlir/test/mlir-spirv-cpu-runner/double.mlir
+++ b/mlir/test/mlir-spirv-cpu-runner/double.mlir
@@ -1,4 +1,5 @@
-// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \
+// RUN: mlir-opt %s -test-spirv-cpu-runner-pipeline \
+// RUN: | mlir-spirv-cpu-runner - -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \
 // RUN: | FileCheck %s
 
 // CHECK: [8,  8,  8,  8,  8,  8]
diff --git a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir
index 119e973e45e4a7b..75675a69a675833 100644
--- a/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir
+++ b/mlir/test/mlir-spirv-cpu-runner/simple_add.mlir
@@ -1,4 +1,5 @@
-// RUN: mlir-spirv-cpu-runner %s -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \
+// RUN: mlir-opt %s -test-spirv-cpu-runner-pipeline \
+// RUN: | mlir-spirv-cpu-runner - -e main --entry-point-result=void --shared-libs=%mlir_runner_utils,%mlir_test_spirv_cpu_runner_c_wrappers \
 // RUN: | FileCheck %s
 
 // CHECK: data =
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 36b142484bb04a6..002c3900056dee1 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -142,6 +142,7 @@ void registerTestSCFWhileOpBuilderPass();
 void registerTestSCFWrapInZeroTripCheckPasses();
 void registerTestShapeMappingPass();
 void registerTestSliceAnalysisPass();
+void registerTestSPIRVCPURunnerPipeline();
 void registerTestSPIRVFuncSignatureConversion();
 void registerTestSPIRVVectorUnrolling();
 void registerTestTensorCopyInsertionPass();
@@ -278,6 +279,7 @@ void registerTestPasses() {
   mlir::test::registerTestSCFWrapInZeroTripCheckPasses();
   mlir::test::registerTestShapeMappingPass();
   mlir::test::registerTestSliceAnalysisPass();
+  mlir::test::registerTestSPIRVCPURunnerPipeline();
   mlir::test::registerTestSPIRVFuncSignatureConversion();
   mlir::test::registerTestSPIRVVectorUnrolling();
   mlir::test::registerTestTensorCopyInsertionPass();
diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp
index 7e0b51cac806213..22ad1024db4a0b6 100644
--- a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp
+++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp
@@ -12,18 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
-#include "mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h"
-#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
-#include "mlir/Dialect/SPIRV/IR/SPIRVOps.h"
-#include "mlir/Dialect/SPIRV/Transforms/Passes.h"
 #include "mlir/ExecutionEngine/JitRunner.h"
 #include "mlir/ExecutionEngine/OptUtils.h"
 #include "mlir/Pass/Pass.h"
@@ -75,23 +69,6 @@ convertMLIRModule(Operation *op, llvm::LLVMContext &context) {
   return mainModule;
 }
 
-static LogicalResult runMLIRPasses(Operation *module,
-                                   JitRunnerOptions &options) {
-  PassManager passManager(module->getContext(),
-                          module->getName().getStringRef());
-  if (failed(applyPassManagerCLOptions(passManager)))
-    return failure();
-  passManager.addPass(createGpuKernelOutliningPass());
-  passManager.addPass(createConvertGPUToSPIRVPass(/*mapMemorySpace=*/true));
-
-  OpPassManager &nestedPM = passManager.nest<spirv::ModuleOp>();
-  nestedPM.addPass(spirv::createSPIRVLowerABIAttributesPass());
-  nestedPM.addPass(spirv::createSPIRVUpdateVCEPass());
-  passManager.addPass(createLowerHostCodeToLLVMPass());
-  passManager.addPass(createConvertSPIRVToLLVMPass());
-  return passManager.run(module);
-}
-
 int main(int argc, char **argv) {
   llvm::InitLLVM y(argc, argv);
 
@@ -99,7 +76,6 @@ int main(int argc, char **argv) {
   llvm::InitializeNativeTargetAsmPrinter();
 
   mlir::JitRunnerConfig jitRunnerConfig;
-  jitRunnerConfig.mlirTransformer = runMLIRPasses;
   jitRunnerConfig.llvmModuleBuilder = convertMLIRModule;
 
   mlir::DialectRegistry registry;
diff --git a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp
index 411a98a48bfb28b..525c8d6d3e89bc9 100644
--- a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp
+++ b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp
@@ -76,7 +76,7 @@ static IndicesTy getOverloadableTypeIdxs(const Record &record,
     case llvm::MVT::iAny:
     case llvm::MVT::fAny:
     case llvm::MVT::Any:
-    case llvm::MVT::iPTRAny:
+    case llvm::MVT::pAny:
     case llvm::MVT::vAny:
       overloadedOps.set(r.index());
       break;
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index f0cc0c2e4d08e54..bdb33d4f4ab27c4 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -927,6 +927,8 @@ struct AMDGPUStreamTy {
     AMDGPUSignalManagerTy *SignalManager;
   };
 
+  using AMDGPUStreamCallbackTy = Error(void *Data);
+
   /// The stream is composed of N stream's slots. The struct below represents
   /// the fields of each slot. Each slot has a signal and an optional action
   /// function. When appending an HSA asynchronous operation to the stream, one
@@ -942,65 +944,82 @@ struct AMDGPUStreamTy {
     /// operation as input signal.
     AMDGPUSignalTy *Signal;
 
-    /// The action that must be performed after the operation's completion. Set
+    /// The actions that must be performed after the operation's completion. Set
     /// to nullptr when there is no action to perform.
-    Error (*ActionFunction)(void *);
+    llvm::SmallVector<AMDGPUStreamCallbackTy *> Callbacks;
 
     /// Space for the action's arguments. A pointer to these arguments is passed
     /// to the action function. Notice the space of arguments is limited.
-    union {
+    union ActionArgsTy {
       MemcpyArgsTy MemcpyArgs;
       ReleaseBufferArgsTy ReleaseBufferArgs;
       ReleaseSignalArgsTy ReleaseSignalArgs;
-    } ActionArgs;
+      void *CallbackArgs;
+    };
+
+    llvm::SmallVector<ActionArgsTy> ActionArgs;
 
     /// Create an empty slot.
-    StreamSlotTy() : Signal(nullptr), ActionFunction(nullptr) {}
+    StreamSlotTy() : Signal(nullptr), Callbacks({}), ActionArgs({}) {}
 
     /// Schedule a host memory copy action on the slot.
     Error schedHostMemoryCopy(void *Dst, const void *Src, size_t Size) {
-      ActionFunction = memcpyAction;
-      ActionArgs.MemcpyArgs = MemcpyArgsTy{Dst, Src, Size};
+      Callbacks.emplace_back(memcpyAction);
+      ActionArgs.emplace_back().MemcpyArgs = MemcpyArgsTy{Dst, Src, Size};
       return Plugin::success();
     }
 
     /// Schedule a release buffer action on the slot.
     Error schedReleaseBuffer(void *Buffer, AMDGPUMemoryManagerTy &Manager) {
-      ActionFunction = releaseBufferAction;
-      ActionArgs.ReleaseBufferArgs = ReleaseBufferArgsTy{Buffer, &Manager};
+      Callbacks.emplace_back(releaseBufferAction);
+      ActionArgs.emplace_back().ReleaseBufferArgs =
+          ReleaseBufferArgsTy{Buffer, &Manager};
       return Plugin::success();
     }
 
     /// Schedule a signal release action on the slot.
     Error schedReleaseSignal(AMDGPUSignalTy *SignalToRelease,
                              AMDGPUSignalManagerTy *SignalManager) {
-      ActionFunction = releaseSignalAction;
-      ActionArgs.ReleaseSignalArgs =
+      Callbacks.emplace_back(releaseSignalAction);
+      ActionArgs.emplace_back().ReleaseSignalArgs =
           ReleaseSignalArgsTy{SignalToRelease, SignalManager};
       return Plugin::success();
     }
 
+    /// Register a callback to be called on compleition
+    Error schedCallback(AMDGPUStreamCallbackTy *Func, void *Data) {
+      Callbacks.emplace_back(Func);
+      ActionArgs.emplace_back().CallbackArgs = Data;
+
+      return Plugin::success();
+    }
+
     // Perform the action if needed.
     Error performAction() {
-      if (!ActionFunction)
+      if (Callbacks.empty())
         return Plugin::success();
 
-      // Perform the action.
-      if (ActionFunction == memcpyAction) {
-        if (auto Err = memcpyAction(&ActionArgs))
-          return Err;
-      } else if (ActionFunction == releaseBufferAction) {
-        if (auto Err = releaseBufferAction(&ActionArgs))
-          return Err;
-      } else if (ActionFunction == releaseSignalAction) {
-        if (auto Err = releaseSignalAction(&ActionArgs))
-          return Err;
-      } else {
-        return Plugin::error("Unknown action function!");
+      assert(Callbacks.size() == ActionArgs.size() && "Size mismatch");
+      for (auto [Callback, ActionArg] : llvm::zip(Callbacks, ActionArgs)) {
+        // Perform the action.
+        if (Callback == memcpyAction) {
+          if (auto Err = memcpyAction(&ActionArg))
+            return Err;
+        } else if (Callback == releaseBufferAction) {
+          if (auto Err = releaseBufferAction(&ActionArg))
+            return Err;
+        } else if (Callback == releaseSignalAction) {
+          if (auto Err = releaseSignalAction(&ActionArg))
+            return Err;
+        } else if (Callback) {
+          if (auto Err = Callback(ActionArg.CallbackArgs))
+            return Err;
+        }
       }
 
       // Invalidate the action.
-      ActionFunction = nullptr;
+      Callbacks.clear();
+      ActionArgs.clear();
 
       return Plugin::success();
     }
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index 439cc20963a1298..61c0bacc9f20629 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -253,6 +253,17 @@ if(NOT WIN32)
       libiomp5${LIBOMP_LIBRARY_SUFFIX}
     WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR}
   )
+  if(LIBOMP_ENABLE_SHARED)
+    if(APPLE)
+      set(VERSIONED_LIBGOMP_NAME libgomp.1${LIBOMP_LIBRARY_SUFFIX})
+    else()
+      set(VERSIONED_LIBGOMP_NAME libgomp${LIBOMP_LIBRARY_SUFFIX}.1)
+    endif()
+    add_custom_command(TARGET omp POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E create_symlink ${LIBOMP_LIB_FILE} ${VERSIONED_LIBGOMP_NAME}
+      WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR}
+    )
+  endif()
 endif()
 
 # Definitions for testing, for reuse when testing libomptarget-nvptx.
@@ -439,13 +450,18 @@ else()
 
   if(${LIBOMP_INSTALL_ALIASES})
     # Create aliases (symlinks) of the library for backwards compatibility
+    extend_path(outdir "${CMAKE_INSTALL_PREFIX}" "${OPENMP_INSTALL_LIBDIR}")
     set(LIBOMP_ALIASES "libgomp;libiomp5")
     foreach(alias IN LISTS LIBOMP_ALIASES)
-      extend_path(outdir "${CMAKE_INSTALL_PREFIX}" "${OPENMP_INSTALL_LIBDIR}")
       install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${LIBOMP_LIB_FILE}\"
         \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY
         \"\$ENV{DESTDIR}${outdir}\")")
     endforeach()
+    if(LIBOMP_ENABLE_SHARED)
+      install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E create_symlink \"${LIBOMP_LIB_FILE}\"
+        \"${VERSIONED_LIBGOMP_NAME}\" WORKING_DIRECTORY
+        \"\$ENV{DESTDIR}${outdir}\")")
+    endif()
   endif()
 endif()
 install(
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index 67368dcedb3bf6d..830165c799c2ab9 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -239,23 +239,6 @@ foreach(entry ${runtimes})
 endforeach()
 
 if(LLVM_INCLUDE_TESTS)
-  # If built with the runtimes build (rooted at runtimes/CMakeLists.txt), we
-  # won't have llvm-lit. If built with the bootstrapping build (rooted at
-  # llvm/CMakeLists.txt), the top-level llvm CMake invocation already generated
-  # the llvm-lit script.
-  if (NOT HAVE_LLVM_LIT)
-    # Add lit before adding any runtimes since their CMake tests configuration
-    # might depend on lit being present.
-    set(LLVM_LIT_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/bin)
-    add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit
-                     ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit)
-    # Ensure that the testsuites use the local lit rather than
-    # ${LLVM_INSTALL_DIR}/bin/llvm-lit (which may not exist if LLVM_BINARY_DIR
-    # points at an installed LLVM tree rather than a build tree).
-    get_llvm_lit_path(_base_dir _file_name)
-    set(LLVM_EXTERNAL_LIT "${_base_dir}/${_file_name}" CACHE STRING "Command used to spawn lit" FORCE)
-  endif()
-
   set(LIT_ARGS_DEFAULT "-sv --show-xfail --show-unsupported")
   if (MSVC OR XCODE)
     set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
@@ -289,6 +272,14 @@ if(LLVM_INCLUDE_TESTS)
   # and we know the total set of lit testsuites.
   umbrella_lit_testsuite_end(check-runtimes)
 
+  if (NOT HAVE_LLVM_LIT)
+    # If built by manually invoking cmake on this directory, we don't have
+    # llvm-lit. If invoked via llvm/runtimes, the toplevel llvm cmake
+    # invocation already generated the llvm-lit script.
+    add_subdirectory(${LLVM_MAIN_SRC_DIR}/utils/llvm-lit
+                     ${CMAKE_CURRENT_BINARY_DIR}/llvm-lit)
+  endif()
+
   get_property(LLVM_RUNTIMES_LIT_TESTSUITES GLOBAL PROPERTY LLVM_RUNTIMES_LIT_TESTSUITES)
   string(REPLACE ";" "\n" LLVM_RUNTIMES_LIT_TESTSUITES "${LLVM_RUNTIMES_LIT_TESTSUITES}")
   file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/lit.tests ${LLVM_RUNTIMES_LIT_TESTSUITES})
@@ -318,10 +309,10 @@ if(SUB_COMPONENTS)
   if(LLVM_RUNTIMES_TARGET)
     configure_file(
       ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in
-      ${CMAKE_CURRENT_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake)
+      ${LLVM_BINARY_DIR}/runtimes/${LLVM_RUNTIMES_TARGET}/Components.cmake)
   else()
     configure_file(
       ${CMAKE_CURRENT_SOURCE_DIR}/Components.cmake.in
-      ${CMAKE_CURRENT_BINARY_DIR}/runtimes/Components.cmake)
+      ${LLVM_BINARY_DIR}/runtimes/Components.cmake)
   endif()
 endif()
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 774366d48a2161f..d4aeaea6fac845d 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -136,6 +136,17 @@ libc_support_library(
     hdrs = ["hdr/fenv_macros.h"],
 )
 
+libc_support_library(
+    name = "hdr_fcntl_macros",
+    hdrs = ["hdr/fcntl_macros.h"],
+    deps = [":hdr_fcntl_overlay"],
+)
+
+libc_support_library(
+    name = "hdr_fcntl_overlay",
+    hdrs = ["hdr/fcntl_overlay.h"],
+)
+
 libc_support_library(
     name = "hdr_signal_macros",
     hdrs = ["hdr/signal_macros.h"],
@@ -201,6 +212,12 @@ libc_support_library(
     hdrs = ["hdr/types/fexcept_t.h"],
 )
 
+libc_support_library(
+    name = "types_mode_t",
+    hdrs = ["hdr/types/mode_t.h"],
+    deps = [":hdr_fcntl_overlay"],
+)
+
 libc_support_library(
     name = "types_sigset_t",
     hdrs = ["hdr/types/sigset_t.h"],
@@ -3488,6 +3505,8 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
+        ":types_mode_t",
     ],
 )
 
@@ -3503,6 +3522,8 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
+        ":types_mode_t",
     ],
 )
 
@@ -3514,6 +3535,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3527,6 +3549,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3571,6 +3594,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3710,6 +3734,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3721,6 +3746,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3800,6 +3826,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3811,6 +3838,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3822,6 +3850,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3833,6 +3862,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3844,6 +3874,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3891,6 +3922,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -3902,6 +3934,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
     ],
 )
 
@@ -4161,6 +4194,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
         ":hdr_stdio_overlay",
         ":types_FILE",
     ],
@@ -4178,6 +4212,7 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
         ":llvm_libc_macros_fcntl_macros",
     ],
 )
@@ -4192,6 +4227,8 @@ libc_function(
         ":__support_common",
         ":__support_osutil_syscall",
         ":errno",
+        ":hdr_fcntl_macros",
+        ":types_mode_t",
     ],
 )
 
diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
index f65da9e98226b62..96d7fa86e9ddf25 100644
--- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
@@ -24,7 +24,7 @@ LIBC_CONFIGURE_OPTIONS = [
     # Documentation in libc/src/string/memory_utils/...
     # "LIBC_COPT_MEMCPY_USE_EMBEDDED_TINY",
     # "LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE",
-    # "LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING",
+    "LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING",
     "LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING",
 
     # Documentation in libc/docs/dev/printf_behavior.rst
diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
index 9dba2efc34f6139..91eb04db3ee9b53 100644
--- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel
@@ -607,7 +607,10 @@ gentbl_cc_library(
 
 cc_library(
     name = "CoreHeaders",
-    hdrs = glob(["include/lldb/Core/**/*.h"]),
+    hdrs = glob([
+        "include/lldb/Core/**/*.h",
+        "include/lldb/ValueObject/**/*.h",  # This should be its own library.
+    ]),
     strip_include_prefix = "include",
     deps = [
         ":BreakpointHeaders",
@@ -627,8 +630,14 @@ cc_library(
 
 cc_library(
     name = "Core",
-    srcs = glob(["source/Core/**/*.cpp"]),
-    hdrs = glob(["include/lldb/Core/**/*.h"]),
+    srcs = glob([
+        "source/Core/**/*.cpp",
+        "source/ValueObject/**/*.cpp",  # This should be its own library.
+    ]),
+    hdrs = glob([
+        "include/lldb/Core/**/*.h",
+        "include/lldb/ValueObject/**/*.h",  # This should be its own library.
+    ]),
     strip_include_prefix = "include",
     deps = [
         ":BreakpointHeaders",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 34beb758a12dd44..c69f793943beeca 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -547,8 +547,13 @@ cc_library(
         ":TestDialect",
         "//llvm:Support",
         "//mlir:FuncDialect",
+        "//mlir:GPUToSPIRV",
+        "//mlir:GPUTransforms",
         "//mlir:IR",
         "//mlir:Pass",
+        "//mlir:SPIRVDialect",
+        "//mlir:SPIRVToLLVM",
+        "//mlir:SPIRVTransforms",
         "//mlir:Support",
     ],
 )